Remove vp8, vp9 folders
Change-Id: I09b8acd22d031ece52e1fee18b998349bf1cf06b
This commit is contained in:
13
configure
vendored
13
configure
vendored
@@ -38,8 +38,6 @@ Advanced options:
|
||||
${toggle_better_hw_compatibility}
|
||||
enable encoder to produce streams with better
|
||||
hardware decoder compatibility
|
||||
${toggle_vp8} VP8 codec support
|
||||
${toggle_vp9} VP9 codec support
|
||||
${toggle_vp10} VP10 codec support
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_postproc} postprocessing
|
||||
@@ -191,13 +189,8 @@ if [ ${doxy_major:-0} -ge 1 ]; then
|
||||
fi
|
||||
|
||||
# disable codecs when their source directory does not exist
|
||||
[ -d "${source_path}/vp8" ] || disable_codec vp8
|
||||
[ -d "${source_path}/vp9" ] || disable_codec vp9
|
||||
[ -d "${source_path}/vp10" ] || disable_codec vp10
|
||||
|
||||
# disable vp10 codec by default
|
||||
disable_codec vp10
|
||||
|
||||
# install everything except the sources, by default. sources will have
|
||||
# to be enabled when doing dist builds, since that's no longer a common
|
||||
# case.
|
||||
@@ -214,16 +207,10 @@ enable_feature os_support
|
||||
enable_feature temporal_denoising
|
||||
|
||||
CODECS="
|
||||
vp8_encoder
|
||||
vp8_decoder
|
||||
vp9_encoder
|
||||
vp9_decoder
|
||||
vp10_encoder
|
||||
vp10_decoder
|
||||
"
|
||||
CODEC_FAMILIES="
|
||||
vp8
|
||||
vp9
|
||||
vp10
|
||||
"
|
||||
|
||||
|
||||
@@ -248,10 +248,12 @@ endif
|
||||
ifeq ($(CONFIG_OS_SUPPORT), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP10) += m
|
||||
else
|
||||
ifeq ($(CONFIG_GCC), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP10) += m
|
||||
endif
|
||||
endif
|
||||
#
|
||||
|
||||
56
libs.mk
56
libs.mk
@@ -53,62 +53,6 @@ CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
|
||||
include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk
|
||||
CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS))
|
||||
|
||||
ifeq ($(CONFIG_VP8),yes)
|
||||
VP8_PREFIX=vp8/
|
||||
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP8_ENCODER),yes)
|
||||
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk
|
||||
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
|
||||
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
|
||||
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
|
||||
CODEC_DOC_SECTIONS += vp8 vp8_encoder
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP8_DECODER),yes)
|
||||
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk
|
||||
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
|
||||
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
|
||||
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
|
||||
CODEC_DOC_SECTIONS += vp8 vp8_decoder
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP9),yes)
|
||||
VP9_PREFIX=vp9/
|
||||
include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP9_ENCODER),yes)
|
||||
VP9_PREFIX=vp9/
|
||||
include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx.mk
|
||||
CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))
|
||||
CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
|
||||
CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
|
||||
INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h
|
||||
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
|
||||
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
|
||||
CODEC_DOC_SECTIONS += vp9 vp9_encoder
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP9_DECODER),yes)
|
||||
VP9_PREFIX=vp9/
|
||||
include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx.mk
|
||||
CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_DX_SRCS))
|
||||
CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_DX_EXPORTS))
|
||||
CODEC_SRCS-yes += $(VP9_PREFIX)vp9dx.mk vpx/vp8.h vpx/vp8dx.h
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
|
||||
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
|
||||
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
|
||||
CODEC_DOC_SECTIONS += vp9 vp9_decoder
|
||||
endif
|
||||
|
||||
VP9_PREFIX=vp9/
|
||||
$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
|
||||
|
||||
# VP10 make file
|
||||
ifeq ($(CONFIG_VP10),yes)
|
||||
VP10_PREFIX=vp10/
|
||||
|
||||
@@ -15,9 +15,6 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#include "./vp9_rtcd.h"
|
||||
#endif
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
|
||||
@@ -15,9 +15,6 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#if CONFIG_VP9_ENCODER
|
||||
#include "./vp9_rtcd.h"
|
||||
#endif
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
@@ -132,78 +129,10 @@ class ConsistencyTestBase : public ::testing::Test {
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
typedef std::tr1::tuple<int, int> ConsistencyParam;
|
||||
class ConsistencyVP9Test
|
||||
: public ConsistencyTestBase,
|
||||
public ::testing::WithParamInterface<ConsistencyParam> {
|
||||
public:
|
||||
ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {}
|
||||
|
||||
protected:
|
||||
double CheckConsistency(int frame) {
|
||||
EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
|
||||
return
|
||||
vpx_get_ssim_metrics(source_data_[frame], source_stride_,
|
||||
reference_data_[frame], reference_stride_,
|
||||
width_, height_, ssim_array_, &metrics_, 1);
|
||||
}
|
||||
};
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL};
|
||||
uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL};
|
||||
Ssimv* ConsistencyTestBase::ssim_array_ = NULL;
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
|
||||
FillRandom(source_data_[0], source_stride_);
|
||||
Copy(source_data_[1], source_data_[0]);
|
||||
Copy(reference_data_[0], source_data_[0]);
|
||||
Blur(reference_data_[0], reference_stride_, 3);
|
||||
Copy(reference_data_[1], source_data_[0]);
|
||||
Blur(reference_data_[1], reference_stride_, 3);
|
||||
|
||||
double inconsistency = CheckConsistency(1);
|
||||
inconsistency = CheckConsistency(0);
|
||||
EXPECT_EQ(inconsistency, 0.0)
|
||||
<< "Should have 0 inconsistency if they are exactly the same.";
|
||||
|
||||
// If sources are not consistent reference frames inconsistency should
|
||||
// be less than if the source is consistent.
|
||||
FillRandom(source_data_[0], source_stride_);
|
||||
FillRandom(source_data_[1], source_stride_);
|
||||
FillRandom(reference_data_[0], reference_stride_);
|
||||
FillRandom(reference_data_[1], reference_stride_);
|
||||
CheckConsistency(0);
|
||||
inconsistency = CheckConsistency(1);
|
||||
|
||||
Copy(source_data_[1], source_data_[0]);
|
||||
CheckConsistency(0);
|
||||
double inconsistency2 = CheckConsistency(1);
|
||||
EXPECT_LT(inconsistency, inconsistency2)
|
||||
<< "Should have less inconsistency if source itself is inconsistent.";
|
||||
|
||||
// Less of a blur should be less inconsistent than more blur coming off a
|
||||
// a frame with no blur.
|
||||
ClearSsim();
|
||||
FillRandom(source_data_[0], source_stride_);
|
||||
Copy(source_data_[1], source_data_[0]);
|
||||
Copy(reference_data_[0], source_data_[0]);
|
||||
Copy(reference_data_[1], source_data_[0]);
|
||||
Blur(reference_data_[1], reference_stride_, 4);
|
||||
CheckConsistency(0);
|
||||
inconsistency = CheckConsistency(1);
|
||||
ClearSsim();
|
||||
Copy(reference_data_[1], source_data_[0]);
|
||||
Blur(reference_data_[1], reference_stride_, 8);
|
||||
CheckConsistency(0);
|
||||
inconsistency2 = CheckConsistency(1);
|
||||
|
||||
EXPECT_LT(inconsistency, inconsistency2)
|
||||
<< "Stronger Blur should produce more inconsistency.";
|
||||
}
|
||||
#endif // CONFIG_VP9_ENCODER
|
||||
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
@@ -211,14 +140,4 @@ using std::tr1::make_tuple;
|
||||
//------------------------------------------------------------------------------
|
||||
// C functions
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
const ConsistencyParam c_vp9_tests[] = {
|
||||
make_tuple(320, 240),
|
||||
make_tuple(318, 242),
|
||||
make_tuple(318, 238),
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
|
||||
::testing::ValuesIn(c_vp9_tests));
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -13,14 +13,11 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
#include "vp9/common/vp9_filter.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
#include "vpx_dsp/vpx_filter.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
@@ -14,14 +14,11 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vp9/common/vp9_scan.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
@@ -14,14 +14,12 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
@@ -20,12 +20,6 @@ namespace {
|
||||
|
||||
TEST(EncodeAPI, InvalidParams) {
|
||||
static const vpx_codec_iface_t *kCodecs[] = {
|
||||
#if CONFIG_VP8_ENCODER
|
||||
&vpx_codec_vp8_cx_algo,
|
||||
#endif
|
||||
#if CONFIG_VP9_ENCODER
|
||||
&vpx_codec_vp9_cx_algo,
|
||||
#endif
|
||||
#if CONFIG_VP10_ENCODER
|
||||
&vpx_codec_vp10_cx_algo,
|
||||
#endif
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
|
||||
@@ -14,14 +14,11 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vp9/common/vp9_scan.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
*/
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
|
||||
@@ -14,14 +14,11 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "vp9/common/vp9_blockd.h"
|
||||
#include "vp9/common/vp9_scan.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
@@ -10,6 +10,7 @@ LIBVPX_TEST_SRCS-yes += test_vectors.h
|
||||
LIBVPX_TEST_SRCS-yes += util.h
|
||||
LIBVPX_TEST_SRCS-yes += video_source.h
|
||||
LIBVPX_TEST_SRCS-yes += transform_test_base.h
|
||||
LIBVPX_TEST_SRCS-yes += function_equivalence_test.h
|
||||
|
||||
##
|
||||
## BLACK BOX TESTS
|
||||
@@ -142,7 +143,7 @@ LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += vp9_encoder_parms_get_to_decoder.cc
|
||||
endif
|
||||
|
||||
LIBVPX_TEST_SRCS-yes += convolve_test.cc
|
||||
#LIBVPX_TEST_SRCS-yes += convolve_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += lpf_8_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += vp9_intrapred_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_decrypt_test.cc
|
||||
@@ -173,7 +174,7 @@ endif # VP9
|
||||
|
||||
## VP10
|
||||
ifeq ($(CONFIG_VP10),yes)
|
||||
LIBVPX_TEST_SRCS-yes += vp10_inv_txfm_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_inv_txfm_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht4x4_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht8x8_test.cc
|
||||
|
||||
@@ -12,7 +12,7 @@ using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
void setup_convolve() {
|
||||
#if HAVE_SSSE3
|
||||
#if HAVE_SSSE3 && CONFIG_RUNTIME_CPU_DETECT
|
||||
vp10_convolve_horiz = vp10_convolve_horiz_c;
|
||||
vp10_convolve_vert = vp10_convolve_vert_c;
|
||||
#endif
|
||||
|
||||
@@ -289,31 +289,31 @@ using std::tr1::make_tuple;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Vp10PartialIDctTest,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_fdct32x32_c,
|
||||
make_tuple(&vp10_fdct32x32_c,
|
||||
&vp10_idct32x32_1024_add_c,
|
||||
&vp10_idct32x32_34_add_c,
|
||||
TX_32X32, 34),
|
||||
make_tuple(&vpx_fdct32x32_c,
|
||||
make_tuple(&vp10_fdct32x32_c,
|
||||
&vp10_idct32x32_1024_add_c,
|
||||
&vp10_idct32x32_1_add_c,
|
||||
TX_32X32, 1),
|
||||
make_tuple(&vpx_fdct16x16_c,
|
||||
make_tuple(&vp10_fdct16x16_c,
|
||||
&vp10_idct16x16_256_add_c,
|
||||
&vp10_idct16x16_10_add_c,
|
||||
TX_16X16, 10),
|
||||
make_tuple(&vpx_fdct16x16_c,
|
||||
make_tuple(&vp10_fdct16x16_c,
|
||||
&vp10_idct16x16_256_add_c,
|
||||
&vp10_idct16x16_1_add_c,
|
||||
TX_16X16, 1),
|
||||
make_tuple(&vpx_fdct8x8_c,
|
||||
make_tuple(&vp10_fdct8x8_c,
|
||||
&vp10_idct8x8_64_add_c,
|
||||
&vp10_idct8x8_12_add_c,
|
||||
TX_8X8, 12),
|
||||
make_tuple(&vpx_fdct8x8_c,
|
||||
make_tuple(&vp10_fdct8x8_c,
|
||||
&vp10_idct8x8_64_add_c,
|
||||
&vp10_idct8x8_1_add_c,
|
||||
TX_8X8, 1),
|
||||
make_tuple(&vpx_fdct4x4_c,
|
||||
make_tuple(&vp10_fdct4x4_c,
|
||||
&vp10_idct4x4_16_add_c,
|
||||
&vp10_idct4x4_1_add_c,
|
||||
TX_4X4, 1)));
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
|
||||
@@ -15,12 +15,10 @@
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "test/util.h"
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
|
||||
#include "./tools_common.h"
|
||||
|
||||
#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
|
||||
#if CONFIG_VP10_ENCODER
|
||||
#include "vpx/vp8cx.h"
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
|
||||
#if CONFIG_VP10_DECODER
|
||||
#include "vpx/vp8dx.h"
|
||||
#endif
|
||||
|
||||
@@ -136,14 +136,6 @@ static const VpxInterface vpx_encoders[] = {
|
||||
#if CONFIG_VP10_ENCODER
|
||||
{"vp10", VP10_FOURCC, &vpx_codec_vp10_cx},
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_ENCODER
|
||||
{"vp8", VP8_FOURCC, &vpx_codec_vp8_cx},
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP9_ENCODER
|
||||
{"vp9", VP9_FOURCC, &vpx_codec_vp9_cx},
|
||||
#endif
|
||||
};
|
||||
|
||||
int get_vpx_encoder_count(void) {
|
||||
@@ -171,13 +163,6 @@ const VpxInterface *get_vpx_encoder_by_name(const char *name) {
|
||||
#if CONFIG_DECODERS
|
||||
|
||||
static const VpxInterface vpx_decoders[] = {
|
||||
#if CONFIG_VP8_DECODER
|
||||
{"vp8", VP8_FOURCC, &vpx_codec_vp8_dx},
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP9_DECODER
|
||||
{"vp9", VP9_FOURCC, &vpx_codec_vp9_dx},
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP10_DECODER
|
||||
{"vp10", VP10_FOURCC, &vpx_codec_vp10_dx},
|
||||
|
||||
@@ -10,10 +10,12 @@
|
||||
|
||||
VP10_COMMON_SRCS-yes += vp10_common.mk
|
||||
VP10_COMMON_SRCS-yes += vp10_iface_common.h
|
||||
VP10_COMMON_SRCS-yes += common/ans.h
|
||||
VP10_COMMON_SRCS-yes += common/ppflags.h
|
||||
VP10_COMMON_SRCS-yes += common/alloccommon.c
|
||||
VP10_COMMON_SRCS-yes += common/blockd.c
|
||||
VP10_COMMON_SRCS-yes += common/debugmodes.c
|
||||
VP10_COMMON_SRCS-yes += common/divide.h
|
||||
VP10_COMMON_SRCS-yes += common/entropy.c
|
||||
VP10_COMMON_SRCS-yes += common/entropymode.c
|
||||
VP10_COMMON_SRCS-yes += common/entropymv.c
|
||||
@@ -57,6 +59,7 @@ VP10_COMMON_SRCS-yes += common/mvref_common.h
|
||||
VP10_COMMON_SRCS-yes += common/quant_common.c
|
||||
VP10_COMMON_SRCS-yes += common/reconinter.c
|
||||
VP10_COMMON_SRCS-yes += common/reconintra.c
|
||||
VP10_COMMON_SRCS-yes += common/restoration.h
|
||||
VP10_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
|
||||
VP10_COMMON_SRCS-yes += common/common_data.h
|
||||
VP10_COMMON_SRCS-yes += common/scan.c
|
||||
|
||||
@@ -1,190 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "alloccommon.h"
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "findnearmv.h"
|
||||
#include "entropymode.h"
|
||||
#include "systemdependent.h"
|
||||
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
|
||||
#if CONFIG_POSTPROC
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
|
||||
if (oci->post_proc_buffer_int_used)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
|
||||
|
||||
vpx_free(oci->pp_limits_buffer);
|
||||
oci->pp_limits_buffer = NULL;
|
||||
#endif
|
||||
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
vpx_free(oci->prev_mip);
|
||||
oci->prev_mip = NULL;
|
||||
#endif
|
||||
|
||||
oci->above_context = NULL;
|
||||
oci->mip = NULL;
|
||||
}
|
||||
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
{
|
||||
int i;
|
||||
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
if ((width & 0xf) != 0)
|
||||
width += 16 - (width & 0xf);
|
||||
|
||||
if ((height & 0xf) != 0)
|
||||
height += 16 - (height & 0xf);
|
||||
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
{
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
oci->yv12_fb[i].flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
}
|
||||
|
||||
oci->new_fb_idx = 0;
|
||||
oci->lst_fb_idx = 1;
|
||||
oci->gld_fb_idx = 2;
|
||||
oci->alt_fb_idx = 3;
|
||||
|
||||
oci->fb_idx_ref_cnt[0] = 1;
|
||||
oci->fb_idx_ref_cnt[1] = 1;
|
||||
oci->fb_idx_ref_cnt[2] = 1;
|
||||
oci->fb_idx_ref_cnt[3] = 1;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mb_rows = height >> 4;
|
||||
oci->mb_cols = width >> 4;
|
||||
oci->MBs = oci->mb_rows * oci->mb_cols;
|
||||
oci->mode_info_stride = oci->mb_cols + 1;
|
||||
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
|
||||
if (!oci->mip)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
/* Allocation of previous mode info will be done in vp8_decode_frame()
|
||||
* as it is a decoder only data */
|
||||
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->above_context)
|
||||
goto allocation_fail;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
* Note: Round up mb_cols to support SIMD reads
|
||||
*/
|
||||
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
|
||||
if (!oci->pp_limits_buffer)
|
||||
goto allocation_fail;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
allocation_fail:
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp8_setup_version(VP8_COMMON *cm)
|
||||
{
|
||||
switch (cm->version)
|
||||
{
|
||||
case 0:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 1:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 2:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 3:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
void vp8_create_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_machine_specific_config(oci);
|
||||
|
||||
vp8_init_mbmode_probs(oci);
|
||||
vp8_default_bmode_probs(oci->fc.bmode_prob);
|
||||
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->filter_type = NORMAL_LOOPFILTER;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
oci->copy_buffer_to_arf = 0;
|
||||
}
|
||||
|
||||
void vp8_remove_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ALLOCCOMMON_H_
|
||||
#define VP8_COMMON_ALLOCCOMMON_H_
|
||||
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_create_common(VP8_COMMON *oci);
|
||||
void vp8_remove_common(VP8_COMMON *oci);
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
|
||||
void vp8_setup_version(VP8_COMMON *oci);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ALLOCCOMMON_H_
|
||||
@@ -1,237 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_bil_second_pass_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *dst_ptr,
|
||||
; r2 unsigned int src_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
||||
|vp8_filter_block2d_bil_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
mov r12, r3 ; outer-loop counter
|
||||
|
||||
add r7, r2, r4 ; preload next row
|
||||
pld [r0, r7]
|
||||
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
|
||||
mov r3, r3, lsl #1 ; height*2
|
||||
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
|
||||
|
||||
mov r11, r1 ; save dst_ptr for each row
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_1st_filter
|
||||
|
||||
|bil_height_loop_1st_v6|
|
||||
ldrb r6, [r0] ; load source data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
mov lr, r4, lsr #2 ; 4-in-parellel loop counter
|
||||
|
||||
|bil_width_loop_1st_v6|
|
||||
ldrb r9, [r0, #3]
|
||||
ldrb r10, [r0, #4]
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0]
|
||||
pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
|
||||
|
||||
smuad r6, r6, r5 ; apply the filter
|
||||
pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
|
||||
smuad r7, r7, r5
|
||||
pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3]
|
||||
|
||||
smuad r8, r8, r5
|
||||
smuad r9, r9, r5
|
||||
|
||||
add r0, r0, #4
|
||||
subs lr, lr, #1
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #16, r6, asr #7
|
||||
usat r7, #16, r7, asr #7
|
||||
|
||||
strh r6, [r1], r3 ; result is transposed and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strh r7, [r1], r3
|
||||
add r9, r9, #0x40
|
||||
usat r8, #16, r8, asr #7
|
||||
usat r9, #16, r9, asr #7
|
||||
|
||||
strh r8, [r1], r3 ; result is transposed and stored
|
||||
|
||||
ldrneb r6, [r0] ; load source data
|
||||
strh r9, [r1], r3
|
||||
|
||||
ldrneb r7, [r0, #1]
|
||||
ldrneb r8, [r0, #2]
|
||||
|
||||
bne bil_width_loop_1st_v6
|
||||
|
||||
add r0, r0, r2 ; move to next input row
|
||||
subs r12, r12, #1
|
||||
|
||||
add r9, r2, r4, lsl #1 ; adding back block width
|
||||
pld [r0, r9] ; preload next row
|
||||
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_1st_v6
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_1st_filter|
|
||||
|bil_height_loop_null_1st|
|
||||
mov lr, r4, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_null_1st|
|
||||
ldrb r6, [r0] ; load data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
ldrb r9, [r0, #3]
|
||||
|
||||
strh r6, [r1], r3 ; store it to immediate buffer
|
||||
add r0, r0, #4
|
||||
strh r7, [r1], r3
|
||||
subs lr, lr, #1
|
||||
strh r8, [r1], r3
|
||||
strh r9, [r1], r3
|
||||
|
||||
bne bil_width_loop_null_1st
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r2 ; move to next input line
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_1st
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP ; |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
|
||||
|
||||
;---------------------------------
|
||||
; r0 unsigned short *src_ptr,
|
||||
; r1 unsigned char *dst_ptr,
|
||||
; r2 int dst_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_bil_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
|
||||
mov r11, r1
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_2nd_filter
|
||||
|
||||
|bil_height_loop_2nd|
|
||||
ldr r6, [r0] ; load the data
|
||||
ldr r8, [r0, #4]
|
||||
ldrh r10, [r0, #8]
|
||||
mov lr, r3, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_2nd|
|
||||
pkhtb r7, r6, r8 ; src[1] | src[2]
|
||||
pkhtb r9, r8, r10 ; src[3] | src[4]
|
||||
|
||||
smuad r6, r6, r5 ; apply filter
|
||||
smuad r8, r8, r5 ; apply filter
|
||||
|
||||
subs lr, lr, #1
|
||||
|
||||
smuadx r7, r7, r5 ; apply filter
|
||||
smuadx r9, r9, r5 ; apply filter
|
||||
|
||||
add r0, r0, #8
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #8, r6, asr #7
|
||||
usat r7, #8, r7, asr #7
|
||||
strb r6, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strb r7, [r1], r2
|
||||
add r9, r9, #0x40
|
||||
usat r8, #8, r8, asr #7
|
||||
usat r9, #8, r9, asr #7
|
||||
strb r8, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
ldrne r6, [r0] ; load data
|
||||
strb r9, [r1], r2
|
||||
ldrne r8, [r0, #4]
|
||||
ldrneh r10, [r0, #8]
|
||||
|
||||
bne bil_width_loop_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4 ; update src for next row
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_2nd
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_2nd_filter|
|
||||
|bil_height_loop_null_2nd|
|
||||
mov lr, r3, lsr #2
|
||||
|
||||
|bil_width_loop_null_2nd|
|
||||
ldr r6, [r0], #4 ; load data
|
||||
subs lr, lr, #1
|
||||
ldr r8, [r0], #4
|
||||
|
||||
strb r6, [r1], r2 ; store data
|
||||
mov r7, r6, lsr #16
|
||||
strb r7, [r1], r2
|
||||
mov r9, r8, lsr #16
|
||||
strb r8, [r1], r2
|
||||
strb r9, [r1], r2
|
||||
|
||||
bne bil_width_loop_null_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_2nd
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_armv6|
|
||||
|
||||
END
|
||||
@@ -1,186 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem16x16_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem16x16_v6| PROC
|
||||
stmdb sp!, {r4 - r7}
|
||||
;push {r4-r7}
|
||||
|
||||
;preload
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
ands r4, r0, #15
|
||||
beq copy_mem16x16_fast
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem16x16_8
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem16x16_4
|
||||
|
||||
;copy one byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
ldrb r6, [r0, #2]
|
||||
ldrb r7, [r0, #3]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
strb r6, [r2, #2]
|
||||
strb r7, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
ldrb r6, [r0, #6]
|
||||
ldrb r7, [r0, #7]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
strb r6, [r2, #6]
|
||||
strb r7, [r2, #7]
|
||||
|
||||
ldrb r4, [r0, #8]
|
||||
ldrb r5, [r0, #9]
|
||||
ldrb r6, [r0, #10]
|
||||
ldrb r7, [r0, #11]
|
||||
|
||||
strb r4, [r2, #8]
|
||||
strb r5, [r2, #9]
|
||||
strb r6, [r2, #10]
|
||||
strb r7, [r2, #11]
|
||||
|
||||
ldrb r4, [r0, #12]
|
||||
ldrb r5, [r0, #13]
|
||||
ldrb r6, [r0, #14]
|
||||
ldrb r7, [r0, #15]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #12]
|
||||
strb r5, [r2, #13]
|
||||
strb r6, [r2, #14]
|
||||
strb r7, [r2, #15]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
ldrneb r6, [r0, #2]
|
||||
ldrneb r7, [r0, #3]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem16x16_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
ldr r6, [r0, #8]
|
||||
ldr r7, [r0, #12]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
str r6, [r2, #8]
|
||||
str r7, [r2, #12]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
ldrne r6, [r0, #8]
|
||||
ldrne r7, [r0, #12]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem16x16_8
|
||||
sub r1, r1, #16
|
||||
sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_8_loop
|
||||
ldmia r0!, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
ldmia r0!, {r6-r7}
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
stmia r2!, {r4-r5}
|
||||
subs r12, r12, #1
|
||||
;stm r2, {r4-r5}
|
||||
stmia r2!, {r6-r7}
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_8_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 16 bytes each time
|
||||
copy_mem16x16_fast
|
||||
;sub r1, r1, #16
|
||||
;sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_fast_loop
|
||||
ldmia r0, {r4-r7}
|
||||
;ldm r0, {r4-r7}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r7}
|
||||
;stm r2, {r4-r7}
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_fast_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem16x16_v6|
|
||||
|
||||
END
|
||||
@@ -1,128 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x4_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp8_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x4_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x4_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x4_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x4_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x4_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x4_4_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x4_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x4_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x4_v6|
|
||||
|
||||
END
|
||||
@@ -1,128 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x8_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x8_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x8_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x8_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x8_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x8_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x8_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x8_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x8_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x8_v6|
|
||||
|
||||
END
|
||||
@@ -1,70 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_dc_only_idct_add_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
; int pred_stride, unsigned char *dst_ptr,
|
||||
; int dst_stride)
|
||||
; r0 input_dc
|
||||
; r1 pred_ptr
|
||||
; r2 pred_stride
|
||||
; r3 dst_ptr
|
||||
; sp dst_stride
|
||||
|
||||
|vp8_dc_only_idct_add_v6| PROC
|
||||
stmdb sp!, {r4 - r7}
|
||||
|
||||
add r0, r0, #4 ; input_dc += 4
|
||||
ldr r12, c0x0000FFFF
|
||||
ldr r4, [r1], r2
|
||||
and r0, r12, r0, asr #3 ; input_dc >> 3 + mask
|
||||
ldr r6, [r1], r2
|
||||
orr r0, r0, r0, lsl #16 ; a1 | a1
|
||||
|
||||
ldr r12, [sp, #16] ; dst stride
|
||||
|
||||
uxtab16 r5, r0, r4 ; a1+2 | a1+0
|
||||
uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
ldr r4, [r1], r2
|
||||
str r5, [r3], r12
|
||||
ldr r6, [r1]
|
||||
str r7, [r3], r12
|
||||
|
||||
uxtab16 r5, r0, r4
|
||||
uxtab16 r4, r0, r4, ror #8
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
str r5, [r3], r12
|
||||
str r7, [r3]
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
bx lr
|
||||
|
||||
ENDP ; |vp8_dc_only_idct_add_v6|
|
||||
|
||||
; Constant Pool
|
||||
c0x0000FFFF DCD 0x0000FFFF
|
||||
END
|
||||
@@ -1,190 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_dequant_idct_add_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
;void vp8_dequant_idct_v6(short *input, short *dq,
|
||||
; unsigned char *dest, int stride)
|
||||
; r0 = q
|
||||
; r1 = dq
|
||||
; r2 = dst
|
||||
; r3 = stride
|
||||
|
||||
|vp8_dequant_idct_add_v6| PROC
|
||||
stmdb sp!, {r4-r11, lr}
|
||||
|
||||
ldr r4, [r0] ;input
|
||||
ldr r5, [r1], #4 ;dq
|
||||
|
||||
sub sp, sp, #4
|
||||
str r3, [sp]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
vp8_dequant_add_loop
|
||||
smulbb r6, r4, r5
|
||||
smultt r7, r4, r5
|
||||
|
||||
ldr r4, [r0, #4] ;input
|
||||
ldr r5, [r1], #4 ;dq
|
||||
|
||||
strh r6, [r0], #2
|
||||
strh r7, [r0], #2
|
||||
|
||||
smulbb r6, r4, r5
|
||||
smultt r7, r4, r5
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
ldrne r4, [r0, #4]
|
||||
ldrne r5, [r1], #4
|
||||
|
||||
strh r6, [r0], #2
|
||||
strh r7, [r0], #2
|
||||
|
||||
bne vp8_dequant_add_loop
|
||||
|
||||
sub r0, r0, #32
|
||||
mov r1, r0
|
||||
|
||||
; short_idct4x4llm_v6_dual
|
||||
ldr r3, cospi8sqrt2minus1
|
||||
ldr r4, sinpi8sqrt2
|
||||
ldr r6, [r0, #8]
|
||||
mov r5, #2
|
||||
vp8_dequant_idct_loop1_v6
|
||||
ldr r12, [r0, #24]
|
||||
ldr r14, [r0, #16]
|
||||
smulwt r9, r3, r6
|
||||
smulwb r7, r3, r6
|
||||
smulwt r10, r4, r6
|
||||
smulwb r8, r4, r6
|
||||
pkhbt r7, r7, r9, lsl #16
|
||||
smulwt r11, r3, r12
|
||||
pkhbt r8, r8, r10, lsl #16
|
||||
uadd16 r6, r6, r7
|
||||
smulwt r7, r4, r12
|
||||
smulwb r9, r3, r12
|
||||
smulwb r10, r4, r12
|
||||
subs r5, r5, #1
|
||||
pkhbt r9, r9, r11, lsl #16
|
||||
ldr r11, [r0], #4
|
||||
pkhbt r10, r10, r7, lsl #16
|
||||
uadd16 r7, r12, r9
|
||||
usub16 r7, r8, r7
|
||||
uadd16 r6, r6, r10
|
||||
uadd16 r10, r11, r14
|
||||
usub16 r8, r11, r14
|
||||
uadd16 r9, r10, r6
|
||||
usub16 r10, r10, r6
|
||||
uadd16 r6, r8, r7
|
||||
usub16 r7, r8, r7
|
||||
str r6, [r1, #8]
|
||||
ldrne r6, [r0, #8]
|
||||
str r7, [r1, #16]
|
||||
str r10, [r1, #24]
|
||||
str r9, [r1], #4
|
||||
bne vp8_dequant_idct_loop1_v6
|
||||
|
||||
mov r5, #2
|
||||
sub r0, r1, #8
|
||||
vp8_dequant_idct_loop2_v6
|
||||
ldr r6, [r0], #4
|
||||
ldr r7, [r0], #4
|
||||
ldr r8, [r0], #4
|
||||
ldr r9, [r0], #4
|
||||
smulwt r1, r3, r6
|
||||
smulwt r12, r4, r6
|
||||
smulwt lr, r3, r8
|
||||
smulwt r10, r4, r8
|
||||
pkhbt r11, r8, r6, lsl #16
|
||||
pkhbt r1, lr, r1, lsl #16
|
||||
pkhbt r12, r10, r12, lsl #16
|
||||
pkhtb r6, r6, r8, asr #16
|
||||
uadd16 r6, r1, r6
|
||||
pkhbt lr, r9, r7, lsl #16
|
||||
uadd16 r10, r11, lr
|
||||
usub16 lr, r11, lr
|
||||
pkhtb r8, r7, r9, asr #16
|
||||
subs r5, r5, #1
|
||||
smulwt r1, r3, r8
|
||||
smulwb r7, r3, r8
|
||||
smulwt r11, r4, r8
|
||||
smulwb r9, r4, r8
|
||||
pkhbt r1, r7, r1, lsl #16
|
||||
uadd16 r8, r1, r8
|
||||
pkhbt r11, r9, r11, lsl #16
|
||||
usub16 r1, r12, r8
|
||||
uadd16 r8, r11, r6
|
||||
ldr r9, c0x00040004
|
||||
ldr r12, [sp] ; get stride from stack
|
||||
uadd16 r6, r10, r8
|
||||
usub16 r7, r10, r8
|
||||
uadd16 r7, r7, r9
|
||||
uadd16 r6, r6, r9
|
||||
uadd16 r10, r14, r1
|
||||
usub16 r1, r14, r1
|
||||
uadd16 r10, r10, r9
|
||||
uadd16 r1, r1, r9
|
||||
ldr r11, [r2] ; load input from dst
|
||||
mov r8, r7, asr #3
|
||||
pkhtb r9, r8, r10, asr #19
|
||||
mov r8, r1, asr #3
|
||||
pkhtb r8, r8, r6, asr #19
|
||||
uxtb16 lr, r11, ror #8
|
||||
qadd16 r9, r9, lr
|
||||
uxtb16 lr, r11
|
||||
qadd16 r8, r8, lr
|
||||
usat16 r9, #8, r9
|
||||
usat16 r8, #8, r8
|
||||
orr r9, r8, r9, lsl #8
|
||||
ldr r11, [r2, r12] ; load input from dst
|
||||
mov r7, r7, lsl #16
|
||||
mov r1, r1, lsl #16
|
||||
mov r10, r10, lsl #16
|
||||
mov r6, r6, lsl #16
|
||||
mov r7, r7, asr #3
|
||||
pkhtb r7, r7, r10, asr #19
|
||||
mov r1, r1, asr #3
|
||||
pkhtb r1, r1, r6, asr #19
|
||||
uxtb16 r8, r11, ror #8
|
||||
qadd16 r7, r7, r8
|
||||
uxtb16 r8, r11
|
||||
qadd16 r1, r1, r8
|
||||
usat16 r7, #8, r7
|
||||
usat16 r1, #8, r1
|
||||
orr r1, r1, r7, lsl #8
|
||||
str r9, [r2], r12 ; store output to dst
|
||||
str r1, [r2], r12 ; store output to dst
|
||||
bne vp8_dequant_idct_loop2_v6
|
||||
|
||||
; memset
|
||||
sub r0, r0, #32
|
||||
add sp, sp, #4
|
||||
|
||||
mov r12, #0
|
||||
str r12, [r0]
|
||||
str r12, [r0, #4]
|
||||
str r12, [r0, #8]
|
||||
str r12, [r0, #12]
|
||||
str r12, [r0, #16]
|
||||
str r12, [r0, #20]
|
||||
str r12, [r0, #24]
|
||||
str r12, [r0, #28]
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_dequant_idct_add_v6|
|
||||
|
||||
; Constant Pool
|
||||
cospi8sqrt2minus1 DCD 0x00004E7B
|
||||
sinpi8sqrt2 DCD 0x00008A8C
|
||||
c0x00040004 DCD 0x00040004
|
||||
|
||||
END
|
||||
@@ -1,69 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_dequantize_b_loop_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------
|
||||
;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
||||
; r0 short *Q,
|
||||
; r1 short *DQC
|
||||
; r2 short *DQ
|
||||
|vp8_dequantize_b_loop_v6| PROC
|
||||
stmdb sp!, {r4-r9, lr}
|
||||
|
||||
ldr r3, [r0] ;load Q
|
||||
ldr r4, [r1] ;load DQC
|
||||
ldr r5, [r0, #4]
|
||||
ldr r6, [r1, #4]
|
||||
|
||||
mov r12, #2 ;loop counter
|
||||
|
||||
dequant_loop
|
||||
smulbb r7, r3, r4 ;multiply
|
||||
smultt r8, r3, r4
|
||||
smulbb r9, r5, r6
|
||||
smultt lr, r5, r6
|
||||
|
||||
ldr r3, [r0, #8]
|
||||
ldr r4, [r1, #8]
|
||||
ldr r5, [r0, #12]
|
||||
ldr r6, [r1, #12]
|
||||
|
||||
strh r7, [r2], #2 ;store result
|
||||
smulbb r7, r3, r4 ;multiply
|
||||
strh r8, [r2], #2
|
||||
smultt r8, r3, r4
|
||||
strh r9, [r2], #2
|
||||
smulbb r9, r5, r6
|
||||
strh lr, [r2], #2
|
||||
smultt lr, r5, r6
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
add r0, r0, #16
|
||||
add r1, r1, #16
|
||||
|
||||
ldrne r3, [r0]
|
||||
strh r7, [r2], #2 ;store result
|
||||
ldrne r4, [r1]
|
||||
strh r8, [r2], #2
|
||||
ldrne r5, [r0, #4]
|
||||
strh r9, [r2], #2
|
||||
ldrne r6, [r1, #4]
|
||||
strh lr, [r2], #2
|
||||
|
||||
bne dequant_loop
|
||||
|
||||
ldmia sp!, {r4-r9, pc}
|
||||
ENDP ;|vp8_dequantize_b_loop_v6|
|
||||
|
||||
END
|
||||
@@ -1,624 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter4_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 short *output_ptr
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int output_width
|
||||
; stack unsigned int output_height
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; vp8_filter the input and put in the output array. Apply the 6 tap FIR filter with
|
||||
; the output being a 2 byte value and the intput being a 1 byte value.
|
||||
|vp8_filter_block2d_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 16x16 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_16x16_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #18 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_16_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_16_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_16_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #34 ; adding back block width(=16)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_16_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 8x8 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_8x8_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #10 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_8_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_8_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_8_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #18 ; adding back block width(=8)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_8_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
sub sp, sp, #4
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
str r1, [sp] ; push destination to stack
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
|
||||
sub r0, r0, #4 ; offset input buffer
|
||||
|
||||
|height_loop_2nd|
|
||||
ldr r8, [r0] ; load the data
|
||||
ldr r9, [r0, #4]
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd|
|
||||
smuad lr, r4, r8 ; apply filter
|
||||
sub r7, r7, #1
|
||||
smulbt r8, r4, r8
|
||||
|
||||
ldr r10, [r0, #8]
|
||||
|
||||
smlad lr, r5, r9, lr
|
||||
smladx r8, r12, r9, r8
|
||||
|
||||
ldrh r9, [r0, #12]
|
||||
|
||||
smlad lr, r6, r10, lr
|
||||
smladx r8, r11, r10, r8
|
||||
|
||||
add r0, r0, #4
|
||||
smlatb r10, r6, r9, r8
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ands r8, r7, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r2 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrne r8, [r0] ; load data for next loop
|
||||
ldrne r9, [r0, #4]
|
||||
strb r10, [r1], r2
|
||||
|
||||
bne width_loop_2nd
|
||||
|
||||
ldr r1, [sp] ; update dst for next loop
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; updata src for next loop
|
||||
add r1, r1, #1
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_2nd
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter4_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
add lr, r1, r3 ; save final destination pointer
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
mov r4, #0x40 ; rounding factor (for smlad{x})
|
||||
|
||||
|height_loop_2nd_4|
|
||||
ldrd r8, r9, [r0, #-4] ; load the data
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd_4|
|
||||
ldr r10, [r0, #4]!
|
||||
smladx r6, r9, r12, r4 ; apply filter
|
||||
pkhbt r8, r9, r8
|
||||
smlad r5, r8, r12, r4
|
||||
pkhbt r8, r10, r9
|
||||
smladx r6, r10, r11, r6
|
||||
sub r7, r7, #1
|
||||
smlad r5, r8, r11, r5
|
||||
|
||||
mov r8, r9 ; shift the data for the next loop
|
||||
mov r9, r10
|
||||
|
||||
usat r6, #8, r6, asr #7 ; shift and clamp
|
||||
usat r5, #8, r5, asr #7
|
||||
|
||||
strb r5, [r1], r2 ; the result is transposed back and stored
|
||||
tst r7, #0xff
|
||||
strb r6, [r1], r2
|
||||
|
||||
bne width_loop_2nd_4
|
||||
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; update src for next loop
|
||||
sub r1, lr, r7, lsr #16 ; update dst for next loop
|
||||
|
||||
bne height_loop_2nd_4
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_first_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
add r7, r2, r3 ; preload next low
|
||||
add r7, r7, #2
|
||||
pld [r0, r7]
|
||||
|
||||
ldr r4, [sp, #36] ; output pitch
|
||||
ldr r11, [sp, #40] ; HFilter address
|
||||
sub sp, sp, #8
|
||||
|
||||
mov r7, r3
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
sub r4, r4, r3
|
||||
str r4, [sp] ; save modified output pitch
|
||||
str r2, [sp, #4]
|
||||
|
||||
mov r2, #0x40
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_only_6|
|
||||
ldrb r8, [r0, #-2] ; load data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
|
||||
mov r12, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_1st_only_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
;; smuad lr, lr, r4
|
||||
smlad lr, lr, r4, r2
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
;; smuad r8, r8, r4
|
||||
smlad r8, r8, r4, r2
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
;; add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
;; add r10, r10, #0x40
|
||||
strb lr, [r1], #1 ; store the result
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0, #-1]
|
||||
strb r10, [r1], #1
|
||||
ldrneb r10, [r0], #2
|
||||
|
||||
bne width_loop_1st_only_6
|
||||
|
||||
ldr lr, [sp] ; load back output pitch
|
||||
ldr r12, [sp, #4] ; load back output pitch
|
||||
subs r7, r7, #1
|
||||
add r0, r0, r12 ; updata src for next loop
|
||||
|
||||
add r11, r12, r3 ; preload next low
|
||||
add r11, r11, #2
|
||||
pld [r0, r11]
|
||||
|
||||
add r1, r1, lr ; update dst for next loop
|
||||
|
||||
bne height_loop_1st_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_first_pass_only_armv6|
|
||||
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_second_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; VFilter address
|
||||
ldr r12, [sp, #36] ; output pitch
|
||||
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
sub r0, r0, r2, lsl #1 ; need 6 elements for filtering, 2 before, 3 after
|
||||
|
||||
sub sp, sp, #8
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r0, [sp] ; save r0 to stack
|
||||
str r1, [sp, #4] ; save dst to stack
|
||||
|
||||
; six tap filter
|
||||
|width_loop_2nd_only_6|
|
||||
ldrb r8, [r0], r2 ; load data
|
||||
orr r7, r7, r3 ; loop counter
|
||||
ldrb r9, [r0], r2
|
||||
ldrb r10, [r0], r2
|
||||
|
||||
|height_loop_2nd_only_6|
|
||||
; filter first column in this inner loop, than, move to next colum.
|
||||
ldrb r11, [r0], r2
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0], r2
|
||||
|
||||
smuad lr, lr, r4
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0], r2
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0]
|
||||
|
||||
sub r7, r7, #2
|
||||
sub r0, r0, r2, lsl #2
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
ands r9, r7, #0xff
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0], r2 ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r12 ; store the result for the column
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0], r2
|
||||
strb r10, [r1], r12
|
||||
ldrneb r10, [r0], r2
|
||||
|
||||
bne height_loop_2nd_only_6
|
||||
|
||||
ldr r0, [sp]
|
||||
ldr r1, [sp, #4]
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #1 ; move to filter next column
|
||||
str r0, [sp]
|
||||
add r1, r1, #1
|
||||
str r1, [sp, #4]
|
||||
|
||||
bne width_loop_2nd_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
END
|
||||
@@ -1,115 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
|
||||
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dst, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
|
||||
else if (eobs[2] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
|
||||
((int *)(q+32))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
|
||||
else if (eobs[3] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
|
||||
((int *)(q+48))[0] = 0;
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
}
|
||||
@@ -1,202 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_short_idct4x4llm_v6_dual|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
|
||||
; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
|
||||
; unsigned char *dst, int stride)
|
||||
; r0 short* input
|
||||
; r1 unsigned char* pred
|
||||
; r2 int pitch
|
||||
; r3 unsigned char* dst
|
||||
; sp int stride
|
||||
|
||||
|vp8_short_idct4x4llm_v6_dual| PROC
|
||||
stmdb sp!, {r4-r11, lr}
|
||||
|
||||
sub sp, sp, #4
|
||||
|
||||
mov r4, #0x00008A00 ; sin
|
||||
orr r4, r4, #0x0000008C ; sinpi8sqrt2
|
||||
|
||||
mov r5, #0x00004E00 ; cos
|
||||
orr r5, r5, #0x0000007B ; cospi8sqrt2minus1
|
||||
orr r5, r5, #1<<31 ; loop counter on top bit
|
||||
|
||||
loop1_dual
|
||||
ldr r6, [r0, #(4*2)] ; i5 | i4
|
||||
ldr r12, [r0, #(12*2)] ; i13|i12
|
||||
ldr r14, [r0, #(8*2)] ; i9 | i8
|
||||
|
||||
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
|
||||
smulbb r7, r5, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
|
||||
smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16
|
||||
|
||||
smulbt r11, r5, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16
|
||||
pkhtb r7, r9, r7, asr #16 ; 5c | 4c
|
||||
pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
|
||||
uadd16 r6, r6, r7 ; 5c+5 | 4c+4
|
||||
|
||||
smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16
|
||||
smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16
|
||||
smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16
|
||||
|
||||
subs r5, r5, #1<<31 ; i--
|
||||
|
||||
pkhtb r9, r11, r9, asr #16 ; 13c | 12c
|
||||
ldr r11, [r0] ; i1 | i0
|
||||
pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
|
||||
uadd16 r7, r12, r9 ; 13c+13 | 12c+12
|
||||
|
||||
usub16 r7, r8, r7 ; c
|
||||
uadd16 r6, r6, r10 ; d
|
||||
uadd16 r10, r11, r14 ; a
|
||||
usub16 r8, r11, r14 ; b
|
||||
|
||||
uadd16 r9, r10, r6 ; a+d
|
||||
usub16 r10, r10, r6 ; a-d
|
||||
uadd16 r6, r8, r7 ; b+c
|
||||
usub16 r7, r8, r7 ; b-c
|
||||
|
||||
; use input buffer to store intermediate results
|
||||
str r6, [r0, #(4*2)] ; o5 | o4
|
||||
str r7, [r0, #(8*2)] ; o9 | o8
|
||||
str r10,[r0, #(12*2)] ; o13|o12
|
||||
str r9, [r0], #4 ; o1 | o0
|
||||
|
||||
bcs loop1_dual
|
||||
|
||||
sub r0, r0, #8 ; reset input/output
|
||||
str r0, [sp]
|
||||
|
||||
loop2_dual
|
||||
|
||||
ldr r6, [r0, #(4*2)] ; i5 | i4
|
||||
ldr r12,[r0, #(2*2)] ; i3 | i2
|
||||
ldr r14,[r0, #(6*2)] ; i7 | i6
|
||||
ldr r0, [r0, #(0*2)] ; i1 | i0
|
||||
|
||||
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
|
||||
smulbt r7, r5, r0 ; (ip[1] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
|
||||
smulwt r8, r4, r0 ; (ip[1] * sinpi8sqrt2) >> 16
|
||||
|
||||
pkhbt r11, r6, r0, lsl #16 ; i0 | i4
|
||||
pkhtb r7, r7, r9, asr #16 ; 1c | 5c
|
||||
pkhtb r0, r0, r6, asr #16 ; i1 | i5
|
||||
pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1
|
||||
|
||||
uadd16 r0, r7, r0 ; 1c+1 | 5c+5 = temp2
|
||||
pkhbt r9, r14, r12, lsl #16 ; i2 | i6
|
||||
uadd16 r10, r11, r9 ; a
|
||||
usub16 r9, r11, r9 ; b
|
||||
pkhtb r6, r12, r14, asr #16 ; i3 | i7
|
||||
|
||||
subs r5, r5, #1<<31 ; i--
|
||||
|
||||
smulbt r7, r5, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16
|
||||
smulbb r12, r5, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16
|
||||
smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16
|
||||
|
||||
pkhtb r7, r7, r12, asr #16 ; 3c | 7c
|
||||
pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1
|
||||
|
||||
uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2
|
||||
usub16 r12, r8, r6 ; c (o1 | o5)
|
||||
uadd16 r6, r11, r0 ; d (o3 | o7)
|
||||
uadd16 r7, r10, r6 ; a+d
|
||||
|
||||
mov r8, #4 ; set up 4's
|
||||
orr r8, r8, #0x40000 ; 4|4
|
||||
|
||||
usub16 r6, r10, r6 ; a-d
|
||||
uadd16 r6, r6, r8 ; a-d+4, 3|7
|
||||
uadd16 r7, r7, r8 ; a+d+4, 0|4
|
||||
uadd16 r10, r9, r12 ; b+c
|
||||
usub16 r0, r9, r12 ; b-c
|
||||
uadd16 r10, r10, r8 ; b+c+4, 1|5
|
||||
uadd16 r8, r0, r8 ; b-c+4, 2|6
|
||||
|
||||
ldr lr, [sp, #40] ; dst stride
|
||||
|
||||
ldrb r0, [r1] ; pred p0
|
||||
ldrb r11, [r1, #1] ; pred p1
|
||||
ldrb r12, [r1, #2] ; pred p2
|
||||
|
||||
add r0, r0, r7, asr #19 ; p0 + o0
|
||||
add r11, r11, r10, asr #19 ; p1 + o1
|
||||
add r12, r12, r8, asr #19 ; p2 + o2
|
||||
|
||||
usat r0, #8, r0 ; d0 = clip8(p0 + o0)
|
||||
usat r11, #8, r11 ; d1 = clip8(p1 + o1)
|
||||
usat r12, #8, r12 ; d2 = clip8(p2 + o2)
|
||||
|
||||
add r0, r0, r11, lsl #8 ; |--|--|d1|d0|
|
||||
|
||||
ldrb r11, [r1, #3] ; pred p3
|
||||
|
||||
add r0, r0, r12, lsl #16 ; |--|d2|d1|d0|
|
||||
|
||||
add r11, r11, r6, asr #19 ; p3 + o3
|
||||
|
||||
sxth r7, r7 ;
|
||||
sxth r10, r10 ;
|
||||
|
||||
usat r11, #8, r11 ; d3 = clip8(p3 + o3)
|
||||
|
||||
sxth r8, r8 ;
|
||||
sxth r6, r6 ;
|
||||
|
||||
add r0, r0, r11, lsl #24 ; |d3|d2|d1|d0|
|
||||
|
||||
ldrb r12, [r1, r2]! ; pred p4
|
||||
str r0, [r3], lr
|
||||
ldrb r11, [r1, #1] ; pred p5
|
||||
|
||||
add r12, r12, r7, asr #3 ; p4 + o4
|
||||
add r11, r11, r10, asr #3 ; p5 + o5
|
||||
|
||||
usat r12, #8, r12 ; d4 = clip8(p4 + o4)
|
||||
usat r11, #8, r11 ; d5 = clip8(p5 + o5)
|
||||
|
||||
ldrb r7, [r1, #2] ; pred p6
|
||||
ldrb r10, [r1, #3] ; pred p6
|
||||
|
||||
add r12, r12, r11, lsl #8 ; |--|--|d5|d4|
|
||||
|
||||
add r7, r7, r8, asr #3 ; p6 + o6
|
||||
add r10, r10, r6, asr #3 ; p7 + o7
|
||||
|
||||
ldr r0, [sp] ; load input pointer
|
||||
|
||||
usat r7, #8, r7 ; d6 = clip8(p6 + o6)
|
||||
usat r10, #8, r10 ; d7 = clip8(p7 + o7)
|
||||
|
||||
add r12, r12, r7, lsl #16 ; |--|d6|d5|d4|
|
||||
add r12, r12, r10, lsl #24 ; |d7|d6|d5|d4|
|
||||
|
||||
str r12, [r3], lr
|
||||
add r0, r0, #16
|
||||
add r1, r1, r2 ; pred + pitch
|
||||
|
||||
bcs loop2_dual
|
||||
|
||||
add sp, sp, #4 ; idct_output buffer
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
@@ -1,136 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_short_inv_walsh4x4_v6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;short vp8_short_inv_walsh4x4_v6(short *input, short *mb_dqcoeff)
|
||||
|vp8_short_inv_walsh4x4_v6| PROC
|
||||
|
||||
stmdb sp!, {r4 - r12, lr}
|
||||
|
||||
ldr r2, [r0, #0] ; [1 | 0]
|
||||
ldr r3, [r0, #4] ; [3 | 2]
|
||||
ldr r4, [r0, #8] ; [5 | 4]
|
||||
ldr r5, [r0, #12] ; [7 | 6]
|
||||
ldr r6, [r0, #16] ; [9 | 8]
|
||||
ldr r7, [r0, #20] ; [11 | 10]
|
||||
ldr r8, [r0, #24] ; [13 | 12]
|
||||
ldr r9, [r0, #28] ; [15 | 14]
|
||||
|
||||
qadd16 r10, r2, r8 ; a1 [1+13 | 0+12]
|
||||
qadd16 r11, r4, r6 ; b1 [5+9 | 4+8]
|
||||
qsub16 r12, r4, r6 ; c1 [5-9 | 4-8]
|
||||
qsub16 lr, r2, r8 ; d1 [1-13 | 0-12]
|
||||
|
||||
qadd16 r2, r10, r11 ; a1 + b1 [1 | 0]
|
||||
qadd16 r4, r12, lr ; c1 + d1 [5 | 4]
|
||||
qsub16 r6, r10, r11 ; a1 - b1 [9 | 8]
|
||||
qsub16 r8, lr, r12 ; d1 - c1 [13 | 12]
|
||||
|
||||
qadd16 r10, r3, r9 ; a1 [3+15 | 2+14]
|
||||
qadd16 r11, r5, r7 ; b1 [7+11 | 6+10]
|
||||
qsub16 r12, r5, r7 ; c1 [7-11 | 6-10]
|
||||
qsub16 lr, r3, r9 ; d1 [3-15 | 2-14]
|
||||
|
||||
qadd16 r3, r10, r11 ; a1 + b1 [3 | 2]
|
||||
qadd16 r5, r12, lr ; c1 + d1 [7 | 6]
|
||||
qsub16 r7, r10, r11 ; a1 - b1 [11 | 10]
|
||||
qsub16 r9, lr, r12 ; d1 - c1 [15 | 14]
|
||||
|
||||
; first transform complete
|
||||
|
||||
qsubaddx r10, r2, r3 ; [c1|a1] [1-2 | 0+3]
|
||||
qaddsubx r11, r2, r3 ; [b1|d1] [1+2 | 0-3]
|
||||
qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7]
|
||||
qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7]
|
||||
|
||||
qaddsubx r2, r10, r11 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r3, r11, r10 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
ldr r10, c0x00030003
|
||||
qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r2, r2, r10 ; [b2+3|c2+3]
|
||||
qadd16 r3, r3, r10 ; [a2+3|d2+3]
|
||||
qadd16 r4, r4, r10 ; [b2+3|c2+3]
|
||||
qadd16 r5, r5, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r12, r3, #19 ; [0]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r2, #19 ; [1]
|
||||
strh lr, [r1], #32
|
||||
sxth r2, r2
|
||||
sxth r3, r3
|
||||
asr r2, r2, #3 ; [2]
|
||||
strh r2, [r1], #32
|
||||
asr r3, r3, #3 ; [3]
|
||||
strh r3, [r1], #32
|
||||
|
||||
asr r12, r5, #19 ; [4]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r4, #19 ; [5]
|
||||
strh lr, [r1], #32
|
||||
sxth r4, r4
|
||||
sxth r5, r5
|
||||
asr r4, r4, #3 ; [6]
|
||||
strh r4, [r1], #32
|
||||
asr r5, r5, #3 ; [7]
|
||||
strh r5, [r1], #32
|
||||
|
||||
qsubaddx r2, r6, r7 ; [c1|a1] [9-10 | 8+11]
|
||||
qaddsubx r3, r6, r7 ; [b1|d1] [9+10 | 8-11]
|
||||
qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15]
|
||||
qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15]
|
||||
|
||||
qaddsubx r6, r2, r3 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r7, r3, r2 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r9, r5, r4 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r6, r6, r10 ; [b2+3|c2+3]
|
||||
qadd16 r7, r7, r10 ; [a2+3|d2+3]
|
||||
qadd16 r8, r8, r10 ; [b2+3|c2+3]
|
||||
qadd16 r9, r9, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r12, r7, #19 ; [8]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r6, #19 ; [9]
|
||||
strh lr, [r1], #32
|
||||
sxth r6, r6
|
||||
sxth r7, r7
|
||||
asr r6, r6, #3 ; [10]
|
||||
strh r6, [r1], #32
|
||||
asr r7, r7, #3 ; [11]
|
||||
strh r7, [r1], #32
|
||||
|
||||
asr r12, r9, #19 ; [12]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r8, #19 ; [13]
|
||||
strh lr, [r1], #32
|
||||
sxth r8, r8
|
||||
sxth r9, r9
|
||||
asr r8, r8, #3 ; [14]
|
||||
strh r8, [r1], #32
|
||||
asr r9, r9, #3 ; [15]
|
||||
strh r9, [r1], #32
|
||||
|
||||
ldmia sp!, {r4 - r12, pc}
|
||||
ENDP ; |vp8_short_inv_walsh4x4_v6|
|
||||
|
||||
|
||||
; Constant Pool
|
||||
c0x00030003 DCD 0x00030003
|
||||
END
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,286 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
EXPORT |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
MACRO
|
||||
TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3
|
||||
; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3
|
||||
; a0: 03 02 01 00
|
||||
; a1: 13 12 11 10
|
||||
; a2: 23 22 21 20
|
||||
; a3: 33 32 31 30
|
||||
; b3 b2 b1 b0
|
||||
|
||||
uxtb16 $b1, $a1 ; xx 12 xx 10
|
||||
uxtb16 $b0, $a0 ; xx 02 xx 00
|
||||
uxtb16 $b3, $a3 ; xx 32 xx 30
|
||||
uxtb16 $b2, $a2 ; xx 22 xx 20
|
||||
orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00
|
||||
orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20
|
||||
|
||||
uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11
|
||||
uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31
|
||||
uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01
|
||||
uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21
|
||||
orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01
|
||||
orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21
|
||||
|
||||
pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1
|
||||
pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3
|
||||
|
||||
pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0
|
||||
pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2
|
||||
MEND
|
||||
|
||||
|
||||
|
||||
src RN r0
|
||||
pstep RN r1
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *blimit
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldrb r12, [r2] ; blimit
|
||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||
ldr r4, [src, -pstep] ; p0
|
||||
ldr r5, [src] ; q0
|
||||
ldr r6, [src, pstep] ; q1
|
||||
orr r12, r12, r12, lsl #8 ; blimit
|
||||
ldr r2, c0x80808080
|
||||
orr r12, r12, r12, lsl #16 ; blimit
|
||||
mov r9, #4 ; double the count. we're doing 4 at a time
|
||||
mov lr, #0 ; need 0 in a couple places
|
||||
|
||||
|simple_hnext8|
|
||||
; vp8_simple_filter_mask()
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r10, r4, r5 ; p0 - q0
|
||||
uqsub8 r11, r5, r4 ; q0 - p0
|
||||
orr r8, r8, r7 ; abs(p1 - q1)
|
||||
orr r10, r10, r11 ; abs(p0 - q0)
|
||||
uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2
|
||||
uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1
|
||||
uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r8, #0
|
||||
usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags
|
||||
sel r10, r8, lr ; filter mask: F or 0
|
||||
cmp r10, #0
|
||||
beq simple_hskip_filter ; skip filtering if all masks are 0x00
|
||||
|
||||
;vp8_simple_filter()
|
||||
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r8, c0x03030303
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, r10 ; vp8_filter &= mask
|
||||
|
||||
qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3
|
||||
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr ; Filter1 >>= 3
|
||||
shadd8 r8 , r8 , lr ; Filter2 >>= 3
|
||||
|
||||
qsub8 r5 ,r5, r7 ; u = q0 - Filter1
|
||||
qadd8 r4, r4, r8 ; u = p0 + Filter2
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
str r5, [src] ; store oq0 result
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
str r4, [src, -pstep] ; store op0 result
|
||||
|
||||
|simple_hskip_filter|
|
||||
subs r9, r9, #1
|
||||
addne src, src, #4 ; next row
|
||||
|
||||
ldrne r3, [src, -pstep, lsl #1] ; p1
|
||||
ldrne r4, [src, -pstep] ; p0
|
||||
ldrne r5, [src] ; q0
|
||||
ldrne r6, [src, pstep] ; q1
|
||||
|
||||
bne simple_hnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_vertical_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldrb r12, [r2] ; r12: blimit
|
||||
ldr r2, c0x80808080
|
||||
orr r12, r12, r12, lsl #8
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrh r4, [src], pstep
|
||||
orr r12, r12, r12, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
mov r11, #4 ; double the count. we're doing 4 at a time
|
||||
|
||||
|simple_vnext8|
|
||||
; vp8_simple_filter_mask() function
|
||||
pkhbt r9, r3, r4, lsl #16
|
||||
pkhbt r10, r5, r6, lsl #16
|
||||
|
||||
;transpose r7, r8, r9, r10 to r3, r4, r5, r6
|
||||
TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r9, r4, r5 ; p0 - q0
|
||||
uqsub8 r10, r5, r4 ; q0 - p0
|
||||
orr r7, r7, r8 ; abs(p1 - q1)
|
||||
orr r9, r9, r10 ; abs(p0 - q0)
|
||||
mov r8, #0
|
||||
uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
|
||||
uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2
|
||||
uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r10, #0 ; r10 == -1
|
||||
|
||||
usub8 r7, r12, r7 ; compare to flimit
|
||||
sel lr, r10, r8 ; filter mask
|
||||
|
||||
cmp lr, #0
|
||||
beq simple_vskip_filter ; skip filtering
|
||||
|
||||
;vp8_simple_filter() function
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r9, c0x03030303 ; r9 = 3
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, lr ; vp8_filter &= mask
|
||||
|
||||
qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3
|
||||
qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8 ; Filter2 >>= 3
|
||||
shadd8 r3 , r3 , r8 ; Filter1 >>= 3
|
||||
|
||||
;calculate output
|
||||
sub src, src, pstep, lsl #2
|
||||
|
||||
qadd8 r4, r4, r9 ; u = p0 + Filter2
|
||||
qsub8 r5, r5, r3 ; u = q0 - Filter1
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
|
||||
strb r4, [src, #-1] ; store the result
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
strb r5, [src], pstep
|
||||
|
||||
|simple_vskip_filter|
|
||||
subs r11, r11, #1
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
bne simple_vnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
; Constant Pool
|
||||
c0x80808080 DCD 0x80808080
|
||||
c0x03030303 DCD 0x03030303
|
||||
c0x04040404 DCD 0x04040404
|
||||
|
||||
END
|
||||
@@ -1,273 +0,0 @@
|
||||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict8x4_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; stack unsigned char *dst_ptr,
|
||||
; stack int dst_pitch
|
||||
;-------------------------------------
|
||||
;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184.
|
||||
;Line width is 20 that is 9 short data plus 2 to make it 4bytes aligned. In second pass, load data from stack,
|
||||
;and the result is stored in transpose.
|
||||
|vp8_sixtap_predict8x4_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
add lr, sp, #4 ;point to temporary buffer
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;first-pass filter
|
||||
adr r12, filter8_coeff
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
add r3, r1, #10 ; preload next low
|
||||
pld [r0, r3]
|
||||
|
||||
add r2, r12, r2, lsl #4 ;calculate filter location
|
||||
add r0, r0, #3 ;adjust src only for loading convinience
|
||||
|
||||
ldr r3, [r2] ; load up packed filter coefficients
|
||||
ldr r4, [r2, #4]
|
||||
ldr r5, [r2, #8]
|
||||
|
||||
mov r2, #0x90000 ; height=9 is top part of counter
|
||||
|
||||
sub r1, r1, #8
|
||||
|
||||
|first_pass_hloop_v6|
|
||||
ldrb r6, [r0, #-5] ; load source data
|
||||
ldrb r7, [r0, #-4]
|
||||
ldrb r8, [r0, #-3]
|
||||
ldrb r9, [r0, #-2]
|
||||
ldrb r10, [r0, #-1]
|
||||
|
||||
orr r2, r2, #0x4 ; construct loop counter. width=8=4x2
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; r7 | r6
|
||||
pkhbt r7, r7, r8, lsl #16 ; r8 | r7
|
||||
|
||||
pkhbt r8, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
|first_pass_wloop_v6|
|
||||
smuad r11, r6, r3 ; vp8_filter[0], vp8_filter[1]
|
||||
smuad r12, r7, r3
|
||||
|
||||
ldrb r6, [r0], #1
|
||||
|
||||
smlad r11, r8, r4, r11 ; vp8_filter[2], vp8_filter[3]
|
||||
ldrb r7, [r0], #1
|
||||
smlad r12, r9, r4, r12
|
||||
|
||||
pkhbt r10, r10, r6, lsl #16 ; r10 | r9
|
||||
pkhbt r6, r6, r7, lsl #16 ; r11 | r10
|
||||
smlad r11, r10, r5, r11 ; vp8_filter[4], vp8_filter[5]
|
||||
smlad r12, r6, r5, r12
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add r11, r11, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff ; test loop counter
|
||||
usat r11, #8, r11, asr #7
|
||||
add r12, r12, #0x40
|
||||
strh r11, [lr], #20 ; result is transposed and stored, which
|
||||
usat r12, #8, r12, asr #7
|
||||
|
||||
strh r12, [lr], #20
|
||||
|
||||
movne r11, r6
|
||||
movne r12, r7
|
||||
|
||||
movne r6, r8
|
||||
movne r7, r9
|
||||
movne r8, r10
|
||||
movne r9, r11
|
||||
movne r10, r12
|
||||
|
||||
bne first_pass_wloop_v6
|
||||
|
||||
;;add r9, ppl, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [src, ppl]
|
||||
;;pld [src, r9]
|
||||
;;ENDIF
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
|
||||
sub lr, lr, #158
|
||||
|
||||
add r0, r0, r1 ; move to next input line
|
||||
|
||||
add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier
|
||||
pld [r0, r11]
|
||||
|
||||
bne first_pass_hloop_v6
|
||||
|
||||
;second pass filter
|
||||
secondpass_filter
|
||||
ldr r3, [sp], #4 ; load back yoffset
|
||||
ldr r0, [sp, #216] ; load dst address from stack 180+36
|
||||
ldr r1, [sp, #220] ; load dst stride from stack 180+40
|
||||
|
||||
cmp r3, #0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
adr r12, filter8_coeff
|
||||
add lr, r12, r3, lsl #4 ;calculate filter location
|
||||
|
||||
mov r2, #0x00080000
|
||||
|
||||
ldr r3, [lr] ; load up packed filter coefficients
|
||||
ldr r4, [lr, #4]
|
||||
ldr r5, [lr, #8]
|
||||
|
||||
pkhbt r12, r4, r3 ; pack the filter differently
|
||||
pkhbt r11, r5, r4
|
||||
|
||||
second_pass_hloop_v6
|
||||
ldr r6, [sp] ; load the data
|
||||
ldr r7, [sp, #4]
|
||||
|
||||
orr r2, r2, #2 ; loop counter
|
||||
|
||||
second_pass_wloop_v6
|
||||
smuad lr, r3, r6 ; apply filter
|
||||
smulbt r10, r3, r6
|
||||
|
||||
ldr r8, [sp, #8]
|
||||
|
||||
smlad lr, r4, r7, lr
|
||||
smladx r10, r12, r7, r10
|
||||
|
||||
ldrh r9, [sp, #12]
|
||||
|
||||
smlad lr, r5, r8, lr
|
||||
smladx r10, r11, r8, r10
|
||||
|
||||
add sp, sp, #4
|
||||
smlatb r10, r5, r9, r10
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r0], r1 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
strb r10, [r0],r1
|
||||
|
||||
movne r6, r7
|
||||
movne r7, r8
|
||||
|
||||
bne second_pass_wloop_v6
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
add sp, sp, #12 ; updata src for next loop (20-8)
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne second_pass_hloop_v6
|
||||
|
||||
add sp, sp, #20
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r1, r1, #8
|
||||
mov r2, #9
|
||||
|
||||
skip_firstpass_hloop
|
||||
ldrb r4, [r0], #1 ; load data
|
||||
subs r2, r2, #1
|
||||
ldrb r5, [r0], #1
|
||||
strh r4, [lr], #20 ; store it to immediate buffer
|
||||
ldrb r6, [r0], #1 ; load data
|
||||
strh r5, [lr], #20
|
||||
ldrb r7, [r0], #1
|
||||
strh r6, [lr], #20
|
||||
ldrb r8, [r0], #1
|
||||
strh r7, [lr], #20
|
||||
ldrb r9, [r0], #1
|
||||
strh r8, [lr], #20
|
||||
ldrb r10, [r0], #1
|
||||
strh r9, [lr], #20
|
||||
ldrb r11, [r0], #1
|
||||
strh r10, [lr], #20
|
||||
add r0, r0, r1 ; move to next input line
|
||||
strh r11, [lr], #20
|
||||
|
||||
sub lr, lr, #158 ; move over to next column
|
||||
bne skip_firstpass_hloop
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;--------------------
|
||||
skip_secondpass_filter
|
||||
mov r2, #8
|
||||
add sp, sp, #4 ;start from src[0] instead of src[-2]
|
||||
|
||||
skip_secondpass_hloop
|
||||
ldr r6, [sp], #4
|
||||
subs r2, r2, #1
|
||||
ldr r8, [sp], #4
|
||||
|
||||
mov r7, r6, lsr #16 ; unpack
|
||||
strb r6, [r0], r1
|
||||
mov r9, r8, lsr #16
|
||||
strb r7, [r0], r1
|
||||
add sp, sp, #12 ; 20-8
|
||||
strb r8, [r0], r1
|
||||
strb r9, [r0], r1
|
||||
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne skip_secondpass_hloop
|
||||
|
||||
add sp, sp, #16 ; 180 - (160 +4)
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter8_coeff
|
||||
DCD 0x00000000, 0x00000080, 0x00000000, 0x00000000
|
||||
DCD 0xfffa0000, 0x000c007b, 0x0000ffff, 0x00000000
|
||||
DCD 0xfff50002, 0x0024006c, 0x0001fff8, 0x00000000
|
||||
DCD 0xfff70000, 0x0032005d, 0x0000fffa, 0x00000000
|
||||
DCD 0xfff00003, 0x004d004d, 0x0003fff0, 0x00000000
|
||||
DCD 0xfffa0000, 0x005d0032, 0x0000fff7, 0x00000000
|
||||
DCD 0xfff80001, 0x006c0024, 0x0002fff5, 0x00000000
|
||||
DCD 0xffff0000, 0x007b000c, 0x0000fffa, 0x00000000
|
||||
|
||||
;DCD 0, 0, 128, 0, 0, 0
|
||||
;DCD 0, -6, 123, 12, -1, 0
|
||||
;DCD 2, -11, 108, 36, -8, 1
|
||||
;DCD 0, -9, 93, 50, -6, 0
|
||||
;DCD 3, -16, 77, 77, -16, 3
|
||||
;DCD 0, -6, 50, 93, -9, 0
|
||||
;DCD 1, -8, 36, 108, -11, 2
|
||||
;DCD 0, -1, 12, 123, -6, 0
|
||||
|
||||
END
|
||||
@@ -1,113 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include <math.h>
|
||||
#include "vp8/common/filter.h"
|
||||
#include "bilinearfilter_arm.h"
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
||||
#define VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ARM_BILINEARFILTER_ARM_H_
|
||||
@@ -1,25 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
||||
|
||||
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
|
||||
{
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
|
||||
}
|
||||
#endif
|
||||
@@ -1,221 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include <math.h>
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 8x8
|
||||
extern void vp8_filter_block2d_first_pass_8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 16x16
|
||||
extern void vp8_filter_block2d_first_pass_16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter4_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if HAVE_MEDIA
|
||||
void vp8_sixtap_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[12*4]); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
|
||||
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Vfilter is a 4 tap filter */
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
/* Vfilter is 6 tap filter */
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[16*8]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED(4, short, FData[24*16]); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
@@ -1,181 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||
unsigned char *v);
|
||||
|
||||
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
|
||||
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
#endif
|
||||
|
||||
#if HAVE_MEDIA
|
||||
/* ARMV6/MEDIA loopfilter functions*/
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
/* NEON loopfilter functions */
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||
}
|
||||
#endif
|
||||
@@ -1,591 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const uint8_t bifilter4_coeff[8][2] = {
|
||||
{128, 0},
|
||||
{112, 16},
|
||||
{ 96, 32},
|
||||
{ 80, 48},
|
||||
{ 64, 64},
|
||||
{ 48, 80},
|
||||
{ 32, 96},
|
||||
{ 16, 112}
|
||||
};
|
||||
|
||||
void vp8_bilinear_predict8x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8;
|
||||
uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16;
|
||||
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d26u8 = vld1_u8(src_ptr);
|
||||
} else {
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src_ptr);
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
|
||||
|
||||
d22u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d23u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d24u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d25u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d26u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d25u8);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d22u8, d0u8);
|
||||
q2u16 = vmull_u8(d23u8, d0u8);
|
||||
q3u16 = vmull_u8(d24u8, d0u8);
|
||||
q4u16 = vmull_u8(d25u8, d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
|
||||
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d5u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8;
|
||||
uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16;
|
||||
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d30u8 = vld1_u8(src_ptr);
|
||||
} else {
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
|
||||
d22u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d23u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d24u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d25u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
|
||||
// first_pass filtering on the rest 5-line data
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src_ptr);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
|
||||
|
||||
d26u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d27u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d28u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d29u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d30u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d29u8);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d22u8, d0u8);
|
||||
q2u16 = vmull_u8(d23u8, d0u8);
|
||||
q3u16 = vmull_u8(d24u8, d0u8);
|
||||
q4u16 = vmull_u8(d25u8, d0u8);
|
||||
q5u16 = vmull_u8(d26u8, d0u8);
|
||||
q6u16 = vmull_u8(d27u8, d0u8);
|
||||
q7u16 = vmull_u8(d28u8, d0u8);
|
||||
q8u16 = vmull_u8(d29u8, d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, d27u8, d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, d28u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d29u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d30u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d9u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
int i;
|
||||
unsigned char tmp[272];
|
||||
unsigned char *tmpp;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
|
||||
uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
|
||||
uint8x8_t d19u8, d20u8, d21u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
|
||||
uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16;
|
||||
uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16;
|
||||
|
||||
if (xoffset == 0) { // secondpass_bfilter16x16_only
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q11u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_pixels_per_line;
|
||||
for (i = 4; i > 0; i--) {
|
||||
q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
|
||||
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
|
||||
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
|
||||
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
|
||||
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
|
||||
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
|
||||
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
q3u8 = vcombine_u8(d6u8, d7u8);
|
||||
q4u8 = vcombine_u8(d8u8, d9u8);
|
||||
|
||||
q11u8 = q15u8;
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (yoffset == 0) { // firstpass_bfilter16x16_only
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
for (i = 4; i > 0 ; i--) {
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q7u16 = vmull_u8(d2u8, d0u8);
|
||||
q8u16 = vmull_u8(d3u8, d0u8);
|
||||
q9u16 = vmull_u8(d5u8, d0u8);
|
||||
q10u16 = vmull_u8(d6u8, d0u8);
|
||||
q11u16 = vmull_u8(d8u8, d0u8);
|
||||
q12u16 = vmull_u8(d9u8, d0u8);
|
||||
q13u16 = vmull_u8(d11u8, d0u8);
|
||||
q14u16 = vmull_u8(d12u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
|
||||
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
|
||||
|
||||
d14u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d20u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d21u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
q10u8 =vcombine_u8(d20u8, d21u8);
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
// First Pass: output_height lines x output_width columns (17x16)
|
||||
tmpp = tmp;
|
||||
for (i = 3; i > 0; i--) {
|
||||
q7u16 = vmull_u8(d2u8, d0u8);
|
||||
q8u16 = vmull_u8(d3u8, d0u8);
|
||||
q9u16 = vmull_u8(d5u8, d0u8);
|
||||
q10u16 = vmull_u8(d6u8, d0u8);
|
||||
q11u16 = vmull_u8(d8u8, d0u8);
|
||||
q12u16 = vmull_u8(d9u8, d0u8);
|
||||
q13u16 = vmull_u8(d11u8, d0u8);
|
||||
q14u16 = vmull_u8(d12u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
|
||||
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
|
||||
|
||||
d14u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d20u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d21u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
q10u8 = vcombine_u8(d20u8, d21u8);
|
||||
|
||||
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16;
|
||||
}
|
||||
|
||||
// First-pass filtering for rest 5 lines
|
||||
d14u8 = vld1_u8(src_ptr);
|
||||
d15u8 = vld1_u8(src_ptr + 8);
|
||||
d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q9u16 = vmull_u8(d2u8, d0u8);
|
||||
q10u16 = vmull_u8(d3u8, d0u8);
|
||||
q11u16 = vmull_u8(d5u8, d0u8);
|
||||
q12u16 = vmull_u8(d6u8, d0u8);
|
||||
q13u16 = vmull_u8(d8u8, d0u8);
|
||||
q14u16 = vmull_u8(d9u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
|
||||
q9u16 = vmlal_u8(q9u16, d2u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d5u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d8u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
|
||||
q10u16 = vmlal_u8(q10u16, d3u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d6u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d9u8, d1u8);
|
||||
|
||||
q1u16 = vmull_u8(d11u8, d0u8);
|
||||
q2u16 = vmull_u8(d12u8, d0u8);
|
||||
q3u16 = vmull_u8(d14u8, d0u8);
|
||||
q4u16 = vmull_u8(d15u8, d0u8);
|
||||
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
d14u8 = vext_u8(d14u8, d15u8, 1);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d11u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d14u8, d1u8);
|
||||
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
d15u8 = vext_u8(d15u8, d16u8, 1);
|
||||
|
||||
q2u16 = vmlal_u8(q2u16, d12u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d15u8, d1u8);
|
||||
|
||||
d10u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d11u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d12u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d13u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d14u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
|
||||
q5u8 = vcombine_u8(d10u8, d11u8);
|
||||
q6u8 = vcombine_u8(d12u8, d13u8);
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
|
||||
vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q9u8);
|
||||
|
||||
// secondpass_filter
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
tmpp = tmp;
|
||||
q11u8 = vld1q_u8(tmpp);
|
||||
tmpp += 16;
|
||||
for (i = 4; i > 0; i--) {
|
||||
q12u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q13u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q14u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q15u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
|
||||
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
|
||||
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
|
||||
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
|
||||
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
|
||||
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
|
||||
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
q3u8 = vcombine_u8(d6u8, d7u8);
|
||||
q4u8 = vcombine_u8(d8u8, d9u8);
|
||||
|
||||
q11u8 = q15u8;
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_copy_mem8x4_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 8; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem16x16_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
uint8x16_t qtmp;
|
||||
|
||||
for (r = 0; r < 16; r++) {
|
||||
qtmp = vld1q_u8(src);
|
||||
vst1q_u8(dst, qtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_dc_only_idct_add_neon(
|
||||
int16_t input_dc,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint16_t a1 = ((input_dc + 4) >> 3);
|
||||
uint32x2_t d2u32 = vdup_n_u32(0);
|
||||
uint8x8_t d2u8;
|
||||
uint16x8_t q1u16;
|
||||
uint16x8_t qAdd;
|
||||
|
||||
qAdd = vdupq_n_u16(a1);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
@@ -1,142 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_dequant_idct_add_neon(
|
||||
int16_t *input,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int32x2_t d14, d15;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
int16x8_t q1, q2, q3, q4, q5, q6;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x2x2_t d2tmp0, d2tmp1;
|
||||
int16x4x2_t d2tmp2, d2tmp3;
|
||||
|
||||
d14 = d15 = vdup_n_s32(0);
|
||||
|
||||
// load input
|
||||
q3 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
input += 8;
|
||||
q4 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
|
||||
// load dq
|
||||
q5 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q6 = vld1q_s16(dq);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
|
||||
vreinterpretq_u16_s16(q5)));
|
||||
q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
|
||||
vreinterpretq_u16_s16(q6)));
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
|
||||
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
|
||||
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
|
||||
vreinterpret_u8_s32(d14)));
|
||||
q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
|
||||
vreinterpret_u8_s32(d15)));
|
||||
|
||||
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
|
||||
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 1);
|
||||
return;
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
|
||||
int16x8x2_t qQ, qDQC, qDQ;
|
||||
|
||||
qQ = vld2q_s16(d->qcoeff);
|
||||
qDQC = vld2q_s16(DQC);
|
||||
|
||||
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
|
||||
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
|
||||
|
||||
vst2q_s16(d->dqcoeff, qDQ);
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
/* place these declarations here because we don't want to maintain them
|
||||
* outside of this scope
|
||||
*/
|
||||
void idct_dequant_full_2x_neon(short *q, short *dq,
|
||||
unsigned char *dst, int stride);
|
||||
void idct_dequant_0_2x_neon(short *q, short dq,
|
||||
unsigned char *dst, int stride);
|
||||
|
||||
|
||||
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dst, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dst, stride);
|
||||
}
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2])
|
||||
{
|
||||
if (((short *)eobs)[2] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[3])
|
||||
{
|
||||
if (((short *)eobs)[3] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void idct_dequant_0_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int i, a0, a1;
|
||||
int16x8x2_t q2Add;
|
||||
int32x2_t d2s32 = vdup_n_s32(0),
|
||||
d4s32 = vdup_n_s32(0);
|
||||
uint8x8_t d2u8, d4u8;
|
||||
uint16x8_t q1u16, q2u16;
|
||||
|
||||
a0 = ((q[0] * dq) + 4) >> 3;
|
||||
a1 = ((q[16] * dq) + 4) >> 3;
|
||||
q[0] = q[16] = 0;
|
||||
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
|
||||
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
|
||||
|
||||
for (i = 0; i < 2; i++, dst += 4) {
|
||||
dst0 = dst;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d2s32));
|
||||
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d4s32));
|
||||
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
|
||||
|
||||
d2s32 = vreinterpret_s32_u8(d2u8);
|
||||
d4s32 = vreinterpret_s32_u8(d4u8);
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1,185 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 17734;
|
||||
// because the lowest bit in 0x8a8c is 0, we can pre-shift this
|
||||
|
||||
void idct_dequant_full_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0, *dst1;
|
||||
int32x2_t d28, d29, d30, d31;
|
||||
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x4x2_t q2tmp0, q2tmp1;
|
||||
int16x8x2_t q2tmp2, q2tmp3;
|
||||
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
|
||||
|
||||
d28 = d29 = d30 = d31 = vdup_n_s32(0);
|
||||
|
||||
// load dq
|
||||
q0 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q1 = vld1q_s16(dq);
|
||||
|
||||
// load q
|
||||
q2 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q3 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q4 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q5 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
|
||||
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
|
||||
|
||||
q2 = vmulq_s16(q2, q0);
|
||||
q3 = vmulq_s16(q3, q1);
|
||||
q4 = vmulq_s16(q4, q0);
|
||||
q5 = vmulq_s16(q5, q1);
|
||||
|
||||
// vswp
|
||||
dLow0 = vget_low_s16(q2);
|
||||
dHigh0 = vget_high_s16(q2);
|
||||
dLow1 = vget_low_s16(q4);
|
||||
dHigh1 = vget_high_s16(q4);
|
||||
q2 = vcombine_s16(dLow0, dLow1);
|
||||
q4 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
dLow0 = vget_low_s16(q3);
|
||||
dHigh0 = vget_high_s16(q3);
|
||||
dLow1 = vget_low_s16(q5);
|
||||
dHigh1 = vget_high_s16(q5);
|
||||
q3 = vcombine_s16(dLow0, dLow1);
|
||||
q5 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
|
||||
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
|
||||
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
|
||||
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
|
||||
|
||||
q10 = vqaddq_s16(q2, q3);
|
||||
q11 = vqsubq_s16(q2, q3);
|
||||
|
||||
q8 = vshrq_n_s16(q8, 1);
|
||||
q9 = vshrq_n_s16(q9, 1);
|
||||
|
||||
q4 = vqaddq_s16(q4, q8);
|
||||
q5 = vqaddq_s16(q5, q9);
|
||||
|
||||
q2 = vqsubq_s16(q6, q5);
|
||||
q3 = vqaddq_s16(q7, q4);
|
||||
|
||||
q4 = vqaddq_s16(q10, q3);
|
||||
q5 = vqaddq_s16(q11, q2);
|
||||
q6 = vqsubq_s16(q11, q2);
|
||||
q7 = vqsubq_s16(q10, q3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
|
||||
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
|
||||
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
|
||||
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
|
||||
|
||||
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
|
||||
q10 = vshrq_n_s16(q10, 1);
|
||||
q11 = vshrq_n_s16(q11, 1);
|
||||
|
||||
q10 = vqaddq_s16(q2tmp2.val[1], q10);
|
||||
q11 = vqaddq_s16(q2tmp3.val[1], q11);
|
||||
|
||||
q8 = vqsubq_s16(q8, q11);
|
||||
q9 = vqaddq_s16(q9, q10);
|
||||
|
||||
q4 = vqaddq_s16(q2, q9);
|
||||
q5 = vqaddq_s16(q3, q8);
|
||||
q6 = vqsubq_s16(q3, q8);
|
||||
q7 = vqsubq_s16(q2, q9);
|
||||
|
||||
q4 = vrshrq_n_s16(q4, 3);
|
||||
q5 = vrshrq_n_s16(q5, 3);
|
||||
q6 = vrshrq_n_s16(q6, 3);
|
||||
q7 = vrshrq_n_s16(q7, 3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
|
||||
vreinterpret_u8_s32(d28)));
|
||||
q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
|
||||
vreinterpret_u8_s32(d29)));
|
||||
q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
|
||||
vreinterpret_u8_s32(d30)));
|
||||
q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
|
||||
vreinterpret_u8_s32(d31)));
|
||||
|
||||
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
|
||||
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
|
||||
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
|
||||
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
|
||||
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
vst1_lane_s32((int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
vst1_lane_s32((int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d31, 0);
|
||||
vst1_lane_s32((int32_t *)dst1, d31, 1);
|
||||
return;
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_short_inv_walsh4x4_neon(
|
||||
int16_t *input,
|
||||
int16_t *mb_dqcoeff) {
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16;
|
||||
int16x4_t d4s16, d5s16, d6s16, d7s16;
|
||||
int16x4x2_t v2tmp0, v2tmp1;
|
||||
int32x2x2_t v2tmp2, v2tmp3;
|
||||
int16x8_t qAdd3;
|
||||
|
||||
q0s16 = vld1q_s16(input);
|
||||
q1s16 = vld1q_s16(input + 8);
|
||||
|
||||
// 1st for loop
|
||||
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_low_s16(q1s16)));
|
||||
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_high_s16(q1s16)));
|
||||
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[0]));
|
||||
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
qAdd3 = vdupq_n_s16(3);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
q0s16 = vaddq_s16(q0s16, qAdd3);
|
||||
q1s16 = vaddq_s16(q1s16, qAdd3);
|
||||
|
||||
q0s16 = vshrq_n_s16(q0s16, 3);
|
||||
q1s16 = vshrq_n_s16(q1s16, 3);
|
||||
|
||||
// store
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
return;
|
||||
}
|
||||
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
uint8_t *sp;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q3s16, q13s16;
|
||||
int8x8_t d8s8, d9s8;
|
||||
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
sp = s - (p << 1);
|
||||
q5u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q6u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q7u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q8u8 = vld1q_u8(sp);
|
||||
|
||||
q15u8 = vabdq_u8(q6u8, q7u8);
|
||||
q14u8 = vabdq_u8(q5u8, q8u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q13s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
q7u8 = veorq_u8(q7u8, q0u8);
|
||||
q8u8 = veorq_u8(q8u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
|
||||
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
|
||||
vreinterpretq_s8_u8(q8u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q13s16);
|
||||
q3s16 = vmulq_s16(q3s16, q13s16);
|
||||
|
||||
q10u8 = vdupq_n_u8(3);
|
||||
q9u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
|
||||
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
|
||||
|
||||
d8s8 = vqmovn_s16(q2s16);
|
||||
d9s8 = vqmovn_s16(q3s16);
|
||||
q4s8 = vcombine_s8(d8s8, d9s8);
|
||||
|
||||
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q3s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
vst1q_u8(s, q7u8);
|
||||
s -= p;
|
||||
vst1q_u8(s, q6u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
@@ -1,283 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE void write_2x4(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result) {
|
||||
/*
|
||||
* uint8x8x2_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 02 12 | 04 14 06 16
|
||||
01 11 03 13 | 05 15 07 17
|
||||
*/
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0],
|
||||
result.val[1]);
|
||||
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
|
||||
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
|
||||
}
|
||||
|
||||
static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result,
|
||||
const uint8x8x2_t result2) {
|
||||
write_2x4(dst, pitch, result);
|
||||
dst += pitch * 8;
|
||||
write_2x4(dst, pitch, result2);
|
||||
}
|
||||
#else
|
||||
static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result,
|
||||
const uint8x8x2_t result2) {
|
||||
vst2_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 7);
|
||||
dst += pitch;
|
||||
|
||||
vst2_lane_u8(dst, result2, 0);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 1);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 2);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 3);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 4);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 5);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 6);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 7);
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
const uint8x8_t a = vld1_u8(src);
|
||||
const uint8x8_t b = vld1_u8(src + pitch * 1);
|
||||
const uint8x8_t c = vld1_u8(src + pitch * 2);
|
||||
const uint8x8_t d = vld1_u8(src + pitch * 3);
|
||||
const uint8x8_t e = vld1_u8(src + pitch * 4);
|
||||
const uint8x8_t f = vld1_u8(src + pitch * 5);
|
||||
const uint8x8_t g = vld1_u8(src + pitch * 6);
|
||||
const uint8x8_t h = vld1_u8(src + pitch * 7);
|
||||
const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a),
|
||||
vreinterpret_u32_u8(e));
|
||||
const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b),
|
||||
vreinterpret_u32_u8(f));
|
||||
const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c),
|
||||
vreinterpret_u32_u8(g));
|
||||
const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d),
|
||||
vreinterpret_u32_u8(h));
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
|
||||
vreinterpret_u16_u32(r26_u32.val[0]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
|
||||
vreinterpret_u16_u32(r37_u32.val[0]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
/*
|
||||
* after vtrn_u32
|
||||
00 01 02 03 | 40 41 42 43
|
||||
10 11 12 13 | 50 51 52 53
|
||||
20 21 22 23 | 60 61 62 63
|
||||
30 31 32 33 | 70 71 72 73
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 40 41 60 61
|
||||
02 03 22 23 | 42 43 62 63
|
||||
10 11 30 31 | 50 51 70 71
|
||||
12 13 32 33 | 52 52 72 73
|
||||
|
||||
00 01 20 21 | 40 41 60 61
|
||||
10 11 30 31 | 50 51 70 71
|
||||
02 03 22 23 | 42 43 62 63
|
||||
12 13 32 33 | 52 52 72 73
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 40 50 60 70
|
||||
01 11 21 31 | 41 51 61 71
|
||||
02 12 22 32 | 42 52 62 72
|
||||
03 13 23 33 | 43 53 63 73
|
||||
*/
|
||||
x.val[0] = r01_u8.val[0];
|
||||
x.val[1] = r01_u8.val[1];
|
||||
x.val[2] = r23_u8.val[0];
|
||||
x.val[3] = r23_u8.val[1];
|
||||
|
||||
return x;
|
||||
}
|
||||
#else
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
|
||||
x = vld4_lane_u8(src, x, 0);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 1);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 2);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 3);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 4);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 5);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 6);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 7);
|
||||
return x;
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
unsigned char *src1;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q13s16, q11s16;
|
||||
int8x8_t d28s8, d29s8;
|
||||
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
|
||||
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
|
||||
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
|
||||
uint8x8x2_t d2u8x2; // d12, d13
|
||||
uint8x8x2_t d3u8x2; // d14, d15
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
src1 = s - 2;
|
||||
d0u8x4 = read_4x8(src1, p);
|
||||
src1 += p * 8;
|
||||
d1u8x4 = read_4x8(src1, p);
|
||||
|
||||
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
|
||||
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
|
||||
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
|
||||
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
|
||||
|
||||
q15u8 = vabdq_u8(q5u8, q4u8);
|
||||
q14u8 = vabdq_u8(q3u8, q6u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q3u8 = veorq_u8(q3u8, q0u8);
|
||||
q4u8 = veorq_u8(q4u8, q0u8);
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
|
||||
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8),
|
||||
vreinterpretq_s8_u8(q6u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
|
||||
q11u8 = vdupq_n_u8(3);
|
||||
q12u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
|
||||
|
||||
d28s8 = vqmovn_s16(q2s16);
|
||||
d29s8 = vqmovn_s16(q13s16);
|
||||
q14s8 = vcombine_s8(d28s8, d29s8);
|
||||
|
||||
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q14s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
|
||||
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
|
||||
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
|
||||
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
|
||||
|
||||
src1 = s - 1;
|
||||
write_2x8(src1, p, d2u8x2, d3u8x2);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
@@ -1,625 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
|
||||
static INLINE void vp8_mbloop_filter_neon(
|
||||
uint8x16_t qblimit, // mblimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p2
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q4r, // p1
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r, // q1
|
||||
uint8x16_t *q9r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8;
|
||||
uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16;
|
||||
int8x16_t q0s8, q12s8, q14s8, q15s8;
|
||||
int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q1u8 = vabdq_u8(q9, q8);
|
||||
q0u8 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q1u8 = vmaxq_u8(q1u8, q0u8);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q12u8 = vabdq_u8(q6, q7);
|
||||
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q1u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
q1u8 = vabdq_u8(q5, q8);
|
||||
q12u8 = vqaddq_u8(q12u8, q12u8);
|
||||
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q9 = veorq_u8(q9, q0u8);
|
||||
q8 = veorq_u8(q8, q0u8);
|
||||
q7 = veorq_u8(q7, q0u8);
|
||||
q6 = veorq_u8(q6, q0u8);
|
||||
q5 = veorq_u8(q5, q0u8);
|
||||
q4 = veorq_u8(q4, q0u8);
|
||||
|
||||
q1u8 = vshrq_n_u8(q1u8, 1);
|
||||
q12u8 = vqaddq_u8(q12u8, q1u8);
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
q12u8 = vcgeq_u8(qblimit, q12u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
|
||||
vreinterpretq_s8_u8(q8));
|
||||
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
|
||||
q15u8 = vandq_u8(q15u8, q12u8);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8));
|
||||
|
||||
q12u8 = vdupq_n_u8(3);
|
||||
q11u8 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2 = vqmovn_s16(q2s16);
|
||||
d3 = vqmovn_s16(q13s16);
|
||||
q1s8 = vcombine_s8(d2, d3);
|
||||
q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8));
|
||||
q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8));
|
||||
q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q13s8 = vshrq_n_s8(q13s8, 3);
|
||||
|
||||
q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8);
|
||||
q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8);
|
||||
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63);
|
||||
d5 = vdup_n_s8(9);
|
||||
d4 = vdup_n_s8(18);
|
||||
|
||||
q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5);
|
||||
q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5);
|
||||
d5 = vdup_n_s8(27);
|
||||
q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4);
|
||||
q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4);
|
||||
q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5);
|
||||
q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5);
|
||||
|
||||
d0 = vqshrn_n_s16(q0s16 , 7);
|
||||
d1 = vqshrn_n_s16(q11s16, 7);
|
||||
d24 = vqshrn_n_s16(q12s16, 7);
|
||||
d25 = vqshrn_n_s16(q13s16, 7);
|
||||
d28 = vqshrn_n_s16(q14s16, 7);
|
||||
d29 = vqshrn_n_s16(q15s16, 7);
|
||||
|
||||
q0s8 = vcombine_s8(d0, d1);
|
||||
q12s8 = vcombine_s8(d24, d25);
|
||||
q14s8 = vcombine_s8(d28, d29);
|
||||
|
||||
q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8);
|
||||
q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8);
|
||||
q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8);
|
||||
q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8);
|
||||
q15s8 = vqsubq_s8((q7s8), q14s8);
|
||||
q14s8 = vqaddq_s8((q6s8), q14s8);
|
||||
|
||||
q1u8 = vdupq_n_u8(0x80);
|
||||
*q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8);
|
||||
*q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
src -= (pitch << 2);
|
||||
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
src -= (pitch * 6);
|
||||
vst1q_u8(src, q4);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q9);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
u -= (pitch * 6);
|
||||
v -= (pitch * 6);
|
||||
vst1_u8(u, vget_low_u8(q4));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q4));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q9));
|
||||
vst1_u8(v, vget_high_u8(q9));
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s1, *s2;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
s1 = src - 4;
|
||||
s2 = s1 + 8 * pitch;
|
||||
d6 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d7 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d8 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d9 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d10 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d11 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d12 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d13 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d14 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d15 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d16 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d17 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d18 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d19 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d20 = vld1_u8(s1);
|
||||
d21 = vld1_u8(s2);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
s1 -= 7 * pitch;
|
||||
s2 -= 7 * pitch;
|
||||
|
||||
vst1_u8(s1, vget_low_u8(q3));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q3));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q4));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q4));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q5));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q5));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q6));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q6));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q7));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q7));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q8));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q8));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q9));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q9));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q10));
|
||||
vst1_u8(s2, vget_high_u8(q10));
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
us = u - 4;
|
||||
vs = v - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
d21 = vld1_u8(vs);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
ud = u - 4;
|
||||
vst1_u8(ud, vget_low_u8(q3));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q4));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q5));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q6));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q7));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q8));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q9));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q10));
|
||||
|
||||
vd = v - 4;
|
||||
vst1_u8(vd, vget_high_u8(q3));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q4));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q5));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q6));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q7));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q8));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q9));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q10));
|
||||
return;
|
||||
}
|
||||
@@ -1,123 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_neon(
|
||||
int16_t *input,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint32x2_t d6u32 = vdup_n_u32(0);
|
||||
uint8x8_t d1u8;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
uint16x8_t q1u16;
|
||||
int16x8_t q1s16, q2s16, q3s16, q4s16;
|
||||
int32x2x2_t v2tmp0, v2tmp1;
|
||||
int16x4x2_t v2tmp2, v2tmp3;
|
||||
|
||||
d2 = vld1_s16(input);
|
||||
d3 = vld1_s16(input + 4);
|
||||
d4 = vld1_s16(input + 8);
|
||||
d5 = vld1_s16(input + 12);
|
||||
|
||||
// 1st for loop
|
||||
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
|
||||
q2s16 = vcombine_s16(d3, d5);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
|
||||
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
|
||||
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
|
||||
|
||||
// dc_only_idct_add
|
||||
for (i = 0; i < 2; i++, q1s16 = q2s16) {
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
|
||||
vreinterpret_u8_u32(d6u32));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
return;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,550 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_neon(
|
||||
uint8x16_t qblimit, // flimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p3
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q11s16;
|
||||
uint16x8_t q4u16;
|
||||
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
|
||||
int8x8_t d2s8, d3s8;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q3 = vabdq_u8(q9, q8);
|
||||
q4 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q3 = vmaxq_u8(q3, q4);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q9 = vabdq_u8(q6, q7);
|
||||
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q3);
|
||||
|
||||
q2u8 = vabdq_u8(q5, q8);
|
||||
q9 = vqaddq_u8(q9, q9);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q10 = vdupq_n_u8(0x80);
|
||||
q8 = veorq_u8(q8, q10);
|
||||
q7 = veorq_u8(q7, q10);
|
||||
q6 = veorq_u8(q6, q10);
|
||||
q5 = veorq_u8(q5, q10);
|
||||
|
||||
q2u8 = vshrq_n_u8(q2u8, 1);
|
||||
q9 = vqaddq_u8(q9, q2u8);
|
||||
|
||||
q10 = vdupq_n_u8(3);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q9 = vcgeq_u8(qblimit, q9);
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
|
||||
vreinterpretq_s8_u8(q8));
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
|
||||
q4u16 = vmovl_u8(vget_low_u8(q10));
|
||||
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
|
||||
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
|
||||
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
|
||||
q15u8 = vandq_u8(q15u8, q9);
|
||||
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
|
||||
|
||||
q9 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2s8 = vqmovn_s16(q2s16);
|
||||
d3s8 = vqmovn_s16(q11s16);
|
||||
q1s8 = vcombine_s8(d2s8, d3s8);
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
|
||||
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
|
||||
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q1s8 = vshrq_n_s8(q1s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
|
||||
|
||||
q1s8 = vrshrq_n_s8(q1s8, 1);
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
|
||||
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
|
||||
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
src -= (pitch << 2);
|
||||
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
src -= (pitch * 5);
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
u -= (pitch * 5);
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
|
||||
v -= (pitch * 5);
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void write_4x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x4_t result) {
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
/*
|
||||
* uint8x8x4_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
20 21 22 23 | 24 25 26 27
|
||||
30 31 32 33 | 34 35 36 37
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 04 05 24 25
|
||||
02 03 22 23 | 06 07 26 27
|
||||
10 11 30 31 | 14 15 34 35
|
||||
12 13 32 33 | 16 17 36 37
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 04 14 24 34
|
||||
01 11 21 31 | 05 15 25 35
|
||||
02 12 22 32 | 06 16 26 36
|
||||
03 13 23 33 | 07 17 27 37
|
||||
*/
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
|
||||
vreinterpret_u16_u8(result.val[2]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
|
||||
vreinterpret_u16_u8(result.val[3]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
|
||||
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
|
||||
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
|
||||
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
|
||||
#else
|
||||
vst4_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 7);
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s, *d;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
s = src - 4;
|
||||
d6 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d10 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d12 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d14 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d20 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d9 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d11 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d13 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d15 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d19 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d21 = vld1_u8(s);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
|
||||
d = src - 2;
|
||||
write_4x8(d, pitch, q4ResultL);
|
||||
d += pitch * 8;
|
||||
write_4x8(d, pitch, q4ResultH);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
us = u - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
|
||||
vs = v - 4;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d21 = vld1_u8(vs);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
ud = u - 2;
|
||||
write_4x8(ud, pitch, q4ResultL);
|
||||
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
vd = v - 2;
|
||||
write_4x8(vd, pitch, q4ResultH);
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
const unsigned char vp8_block2left[25] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
};
|
||||
const unsigned char vp8_block2above[25] =
|
||||
{
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
||||
@@ -1,312 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_BLOCKD_H_
|
||||
#define VP8_COMMON_BLOCKD_H_
|
||||
|
||||
void vpx_log(const char *format, ...);
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "mv.h"
|
||||
#include "treecoder.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*#define DCPRED 1*/
|
||||
#define DCPREDSIMTHRESH 0
|
||||
#define DCPREDCNTTHRESH 3
|
||||
|
||||
#define MB_FEATURE_TREE_PROBS 3
|
||||
#define MAX_MB_SEGMENTS 4
|
||||
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 4
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_DELTADATA 0
|
||||
#define SEGMENT_ABSDATA 1
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int r, c;
|
||||
} POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
#define PLANE_TYPE_UV 2
|
||||
#define PLANE_TYPE_Y_WITH_DC 3
|
||||
|
||||
|
||||
typedef char ENTROPY_CONTEXT;
|
||||
typedef struct
|
||||
{
|
||||
ENTROPY_CONTEXT y1[4];
|
||||
ENTROPY_CONTEXT u[2];
|
||||
ENTROPY_CONTEXT v[2];
|
||||
ENTROPY_CONTEXT y2;
|
||||
} ENTROPY_CONTEXT_PLANES;
|
||||
|
||||
extern const unsigned char vp8_block2left[25];
|
||||
extern const unsigned char vp8_block2above[25];
|
||||
|
||||
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
|
||||
Dest = (A)+(B);
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
KEY_FRAME = 0,
|
||||
INTER_FRAME = 1
|
||||
} FRAME_TYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DC_PRED, /* average of above and left pixels */
|
||||
V_PRED, /* vertical prediction */
|
||||
H_PRED, /* horizontal prediction */
|
||||
TM_PRED, /* Truemotion prediction */
|
||||
B_PRED, /* block based prediction, each block has its own prediction mode */
|
||||
|
||||
NEARESTMV,
|
||||
NEARMV,
|
||||
ZEROMV,
|
||||
NEWMV,
|
||||
SPLITMV,
|
||||
|
||||
MB_MODE_COUNT
|
||||
} MB_PREDICTION_MODE;
|
||||
|
||||
/* Macroblock level features */
|
||||
typedef enum
|
||||
{
|
||||
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
|
||||
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
|
||||
MB_LVL_MAX = 2 /* Number of MB level features supported */
|
||||
|
||||
} MB_LVL_FEATURES;
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_ALTQ 0x01
|
||||
#define SEGMENT_ALT_LF 0x02
|
||||
|
||||
#define VP8_YMODES (B_PRED + 1)
|
||||
#define VP8_UV_MODES (TM_PRED + 1)
|
||||
|
||||
#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
|
||||
|
||||
typedef enum
|
||||
{
|
||||
B_DC_PRED, /* average of above and left pixels */
|
||||
B_TM_PRED,
|
||||
|
||||
B_VE_PRED, /* vertical prediction */
|
||||
B_HE_PRED, /* horizontal prediction */
|
||||
|
||||
B_LD_PRED,
|
||||
B_RD_PRED,
|
||||
|
||||
B_VR_PRED,
|
||||
B_VL_PRED,
|
||||
B_HD_PRED,
|
||||
B_HU_PRED,
|
||||
|
||||
LEFT4X4,
|
||||
ABOVE4X4,
|
||||
ZERO4X4,
|
||||
NEW4X4,
|
||||
|
||||
B_MODE_COUNT
|
||||
} B_PREDICTION_MODE;
|
||||
|
||||
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
|
||||
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
|
||||
|
||||
/* For keyframes, intra block modes are predicted by the (already decoded)
|
||||
modes for the Y blocks to the left and above us; for interframes, there
|
||||
is a single probability table. */
|
||||
|
||||
union b_mode_info
|
||||
{
|
||||
B_PREDICTION_MODE as_mode;
|
||||
int_mv mv;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INTRA_FRAME = 0,
|
||||
LAST_FRAME = 1,
|
||||
GOLDEN_FRAME = 2,
|
||||
ALTREF_FRAME = 3,
|
||||
MAX_REF_FRAMES = 4
|
||||
} MV_REFERENCE_FRAME;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t mode, uv_mode;
|
||||
uint8_t ref_frame;
|
||||
uint8_t is_4x4;
|
||||
int_mv mv;
|
||||
|
||||
uint8_t partitioning;
|
||||
uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
uint8_t need_to_clamp_mvs;
|
||||
uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */
|
||||
} MB_MODE_INFO;
|
||||
|
||||
typedef struct modeinfo
|
||||
{
|
||||
MB_MODE_INFO mbmi;
|
||||
union b_mode_info bmi[16];
|
||||
} MODE_INFO;
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* The mb-level information needed to be stored for higher-resolution encoder */
|
||||
typedef struct
|
||||
{
|
||||
MB_PREDICTION_MODE mode;
|
||||
MV_REFERENCE_FRAME ref_frame;
|
||||
int_mv mv;
|
||||
int dissim; /* dissimilarity level of the macroblock */
|
||||
} LOWER_RES_MB_INFO;
|
||||
|
||||
/* The frame-level information needed to be stored for higher-resolution
|
||||
* encoder */
|
||||
typedef struct
|
||||
{
|
||||
FRAME_TYPE frame_type;
|
||||
int is_frame_dropped;
|
||||
// The frame rate for the lowest resolution.
|
||||
double low_res_framerate;
|
||||
/* The frame number of each reference frames */
|
||||
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
|
||||
// The video frame counter value for the key frame, for lowest resolution.
|
||||
unsigned int key_frame_counter_value;
|
||||
LOWER_RES_MB_INFO *mb_info;
|
||||
} LOWER_RES_FRAME_INFO;
|
||||
#endif
|
||||
|
||||
typedef struct blockd
|
||||
{
|
||||
short *qcoeff;
|
||||
short *dqcoeff;
|
||||
unsigned char *predictor;
|
||||
short *dequant;
|
||||
|
||||
int offset;
|
||||
char *eob;
|
||||
|
||||
union b_mode_info bmi;
|
||||
} BLOCKD;
|
||||
|
||||
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
typedef struct macroblockd
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, dequant_y1[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y2[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_uv[16]);
|
||||
|
||||
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
|
||||
BLOCKD block[25];
|
||||
int fullpixel_mask;
|
||||
|
||||
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
|
||||
YV12_BUFFER_CONFIG dst;
|
||||
|
||||
MODE_INFO *mode_info_context;
|
||||
int mode_info_stride;
|
||||
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int up_available;
|
||||
int left_available;
|
||||
|
||||
unsigned char *recon_above[3];
|
||||
unsigned char *recon_left[3];
|
||||
int recon_left_stride[2];
|
||||
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context;
|
||||
ENTROPY_CONTEXT_PLANES *left_context;
|
||||
|
||||
/* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
|
||||
unsigned char segmentation_enabled;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
|
||||
unsigned char update_mb_segmentation_map;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char update_mb_segmentation_data;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char mb_segement_abs_delta;
|
||||
|
||||
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
|
||||
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
|
||||
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
|
||||
|
||||
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
|
||||
|
||||
/* mode_based Loop filter adjustment */
|
||||
unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;
|
||||
|
||||
/* Delta values have the range +/- MAX_LOOP_FILTER */
|
||||
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
|
||||
/* Distance of MB away from frame edges */
|
||||
int mb_to_left_edge;
|
||||
int mb_to_right_edge;
|
||||
int mb_to_top_edge;
|
||||
int mb_to_bottom_edge;
|
||||
|
||||
|
||||
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
vp8_subpix_fn_t subpixel_predict8x4;
|
||||
vp8_subpix_fn_t subpixel_predict8x8;
|
||||
vp8_subpix_fn_t subpixel_predict16x16;
|
||||
|
||||
void *current_bc;
|
||||
|
||||
int corrupted;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
/* This is an intermediate buffer currently used in sub-pixel motion search
|
||||
* to keep a copy of the reference area. This buffer can be used for other
|
||||
* purpose.
|
||||
*/
|
||||
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
|
||||
#endif
|
||||
} MACROBLOCKD;
|
||||
|
||||
|
||||
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
|
||||
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_BLOCKD_H_
|
||||
@@ -1,197 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_COEFUPDATEPROBS_H_
|
||||
#define VP8_COMMON_COEFUPDATEPROBS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Update probabilities for the nodes in the token entropy tree.
|
||||
Generated file included by entropy.c */
|
||||
|
||||
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
|
||||
{
|
||||
{
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
|
||||
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_COEFUPDATEPROBS_H_
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_COMMON_H_
|
||||
#define VP8_COMMON_COMMON_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Interface header for common constant data structures and lookup tables */
|
||||
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Only need this for fixed-size arrays, for structs just assign. */
|
||||
|
||||
#define vp8_copy( Dest, Src) { \
|
||||
assert( sizeof( Dest) == sizeof( Src)); \
|
||||
memcpy( Dest, Src, sizeof( Src)); \
|
||||
}
|
||||
|
||||
/* Use this for variably-sized arrays. */
|
||||
|
||||
#define vp8_copy_array( Dest, Src, N) { \
|
||||
assert( sizeof( *(Dest)) == sizeof( *(Src))); \
|
||||
memcpy( Dest, Src, N * sizeof( *(Src))); \
|
||||
}
|
||||
|
||||
#define vp8_zero( Dest) memset( &(Dest), 0, sizeof( Dest));
|
||||
|
||||
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *(Dest)));
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_COMMON_H_
|
||||
@@ -1,399 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropy.h"
|
||||
|
||||
/* *** GENERATED FILE: DO NOT EDIT *** */
|
||||
|
||||
#if 0
|
||||
int Contexts[vp8_coef_counter_dimen];
|
||||
|
||||
const int default_contexts[vp8_coef_counter_dimen] =
|
||||
{
|
||||
{
|
||||
// Block Type ( 0 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
|
||||
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
|
||||
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
|
||||
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
|
||||
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
|
||||
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
|
||||
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
|
||||
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
|
||||
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 1 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
|
||||
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
|
||||
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
|
||||
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
|
||||
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
|
||||
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
|
||||
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
|
||||
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
|
||||
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
|
||||
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
|
||||
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
|
||||
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
|
||||
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 2 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
|
||||
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
|
||||
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
|
||||
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
|
||||
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
|
||||
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
|
||||
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
|
||||
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
|
||||
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
|
||||
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 3 )
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
|
||||
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
|
||||
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
|
||||
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
|
||||
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
|
||||
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
|
||||
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
|
||||
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
|
||||
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
|
||||
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
|
||||
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
|
||||
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
|
||||
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
|
||||
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
|
||||
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
//Update probabilities for the nodes in the token entropy tree.
|
||||
const vp8_prob tree_update_probs[vp8_coef_tree_dimen] =
|
||||
{
|
||||
{
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
|
||||
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
};
|
||||
#endif
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
memcpy(dst_ptr, src_ptr, 32);
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,155 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include "blockd.h"
|
||||
|
||||
|
||||
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame)
|
||||
{
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_index = 0;
|
||||
FILE *mvs = fopen("mvs.stt", "a");
|
||||
|
||||
/* print out the macroblock Y modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the macroblock UV modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "UV Modes for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
|
||||
if (mi[mb_index].mbmi.mode == B_PRED)
|
||||
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
|
||||
else
|
||||
fprintf(mvs, "xx ");
|
||||
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the macroblock mvs */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col);
|
||||
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
fclose(mvs);
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
#define VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*Generated file, included by entropy.c*/
|
||||
|
||||
|
||||
static const vp8_prob default_coef_probs [BLOCK_TYPES]
|
||||
[COEF_BANDS]
|
||||
[PREV_COEF_CONTEXTS]
|
||||
[ENTROPY_NODES] =
|
||||
{
|
||||
{ /* Block Type ( 0 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
|
||||
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
|
||||
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
|
||||
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
|
||||
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
|
||||
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
|
||||
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
|
||||
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
|
||||
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
|
||||
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
|
||||
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
|
||||
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
|
||||
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 1 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
|
||||
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
|
||||
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
|
||||
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
|
||||
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
|
||||
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
|
||||
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
|
||||
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
|
||||
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
|
||||
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
|
||||
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
|
||||
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
|
||||
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
|
||||
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
|
||||
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 2 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
|
||||
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
|
||||
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
|
||||
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
|
||||
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
|
||||
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
|
||||
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
|
||||
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 3 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
|
||||
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
|
||||
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
|
||||
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
|
||||
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
|
||||
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
|
||||
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
|
||||
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
|
||||
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
|
||||
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
|
||||
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
|
||||
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
|
||||
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
|
||||
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
|
||||
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
|
||||
{
|
||||
int i;
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
DQ[i] = Q[i] * DQC[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
||||
@@ -1,188 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "entropy.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#include "coefupdateprobs.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) =
|
||||
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char,
|
||||
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
|
||||
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
|
||||
|
||||
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
{
|
||||
0, 1, 4, 8,
|
||||
5, 2, 3, 6,
|
||||
9, 12, 13, 10,
|
||||
7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
/* vp8_default_zig_zag_mask generated with:
|
||||
|
||||
void vp8_init_scan_order_mask()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i;
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) =
|
||||
{
|
||||
1, 2, 32, 64,
|
||||
4, 16, 128, 4096,
|
||||
8, 256, 2048, 8192,
|
||||
512, 1024, 16384, -32768
|
||||
};
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
|
||||
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
|
||||
|
||||
const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
|
||||
{
|
||||
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
|
||||
-ZERO_TOKEN, 4, /* 1 = ZERO */
|
||||
-ONE_TOKEN, 6, /* 2 = ONE */
|
||||
8, 12, /* 3 = LOW_VAL */
|
||||
-TWO_TOKEN, 10, /* 4 = TWO */
|
||||
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
|
||||
14, 16, /* 6 = HIGH_LOW */
|
||||
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
|
||||
18, 20, /* 8 = CAT_THREEFOUR */
|
||||
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
|
||||
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
|
||||
};
|
||||
|
||||
/* vp8_coef_encodings generated with:
|
||||
vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
|
||||
*/
|
||||
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
|
||||
{
|
||||
{2, 2},
|
||||
{6, 3},
|
||||
{28, 5},
|
||||
{58, 6},
|
||||
{59, 6},
|
||||
{60, 6},
|
||||
{61, 6},
|
||||
{124, 7},
|
||||
{125, 7},
|
||||
{126, 7},
|
||||
{127, 7},
|
||||
{0, 1}
|
||||
};
|
||||
|
||||
/* Trees for extra bits. Probabilities are constant and
|
||||
do not depend on previously encoded bits */
|
||||
|
||||
static const vp8_prob Pcat1[] = { 159};
|
||||
static const vp8_prob Pcat2[] = { 165, 145};
|
||||
static const vp8_prob Pcat3[] = { 173, 148, 140};
|
||||
static const vp8_prob Pcat4[] = { 176, 155, 140, 135};
|
||||
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130};
|
||||
static const vp8_prob Pcat6[] =
|
||||
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
|
||||
|
||||
|
||||
/* tree index tables generated with:
|
||||
|
||||
void init_bit_tree(vp8_tree_index *p, int n)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
while (++i < n)
|
||||
{
|
||||
p[0] = p[1] = i << 1;
|
||||
p += 2;
|
||||
}
|
||||
|
||||
p[0] = p[1] = 0;
|
||||
}
|
||||
|
||||
void init_bit_trees()
|
||||
{
|
||||
init_bit_tree(cat1, 1);
|
||||
init_bit_tree(cat2, 2);
|
||||
init_bit_tree(cat3, 3);
|
||||
init_bit_tree(cat4, 4);
|
||||
init_bit_tree(cat5, 5);
|
||||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
*/
|
||||
|
||||
static const vp8_tree_index cat1[2] = { 0, 0 };
|
||||
static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 };
|
||||
static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
|
||||
static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
|
||||
static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
|
||||
static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
|
||||
14, 14, 16, 16, 18, 18, 20, 20, 0, 0 };
|
||||
|
||||
const vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
#include "default_coef_probs.h"
|
||||
|
||||
void vp8_default_coef_probs(VP8_COMMON *pc)
|
||||
{
|
||||
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
|
||||
}
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPY_H_
|
||||
#define VP8_COMMON_ENTROPY_H_
|
||||
|
||||
#include "treecoder.h"
|
||||
#include "blockd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Coefficient token alphabet */
|
||||
|
||||
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
|
||||
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
|
||||
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
|
||||
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define MAX_ENTROPY_TOKENS 12
|
||||
#define ENTROPY_NODES 11
|
||||
|
||||
extern const vp8_tree_index vp8_coef_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
|
||||
extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
|
||||
|
||||
#define PROB_UPDATE_BASELINE_COST 7
|
||||
|
||||
#define MAX_PROB 255
|
||||
#define DCT_MAX_VALUE 2048
|
||||
|
||||
|
||||
/* Coefficients are predicted via a 3-dimensional probability table. */
|
||||
|
||||
/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
|
||||
|
||||
#define BLOCK_TYPES 4
|
||||
|
||||
/* Middle dimension is a coarsening of the coefficient's
|
||||
position within the 4x4 DCT. */
|
||||
|
||||
#define COEF_BANDS 8
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
|
||||
|
||||
/* Inside dimension is 3-valued measure of nearby complexity, that is,
|
||||
the extent to which nearby coefficients are nonzero. For the first
|
||||
coefficient (DC, unless block type is 0), we look at the (already encoded)
|
||||
blocks above and to the left of the current block. The context index is
|
||||
then the number (0,1,or 2) of these blocks having nonzero coefficients.
|
||||
After decoding a coefficient, the measure is roughly the size of the
|
||||
most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1).
|
||||
Note that the intuitive meaning of this measure changes as coefficients
|
||||
are decoded, e.g., prior to the first token, a zero means that my neighbors
|
||||
are empty while, after the first token, because of the use of end-of-block,
|
||||
a zero means we just decoded a zero and hence guarantees that a non-zero
|
||||
coefficient will appear later in this block. However, this shift
|
||||
in meaning is perfectly OK because our context depends also on the
|
||||
coefficient band (and since zigzag positions 0, 1, and 2 are in
|
||||
distinct bands). */
|
||||
|
||||
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
|
||||
# define PREV_COEF_CONTEXTS 3
|
||||
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
|
||||
|
||||
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
|
||||
|
||||
struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]);
|
||||
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
|
||||
|
||||
void vp8_coef_tree_initialize(void);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPY_H_
|
||||
@@ -1,171 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#define USE_PREBUILT_TABLES
|
||||
|
||||
#include "entropymode.h"
|
||||
#include "entropy.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#include "vp8_entropymodedata.h"
|
||||
|
||||
int vp8_mv_cont(const int_mv *l, const int_mv *a)
|
||||
{
|
||||
int lez = (l->as_int == 0);
|
||||
int aez = (a->as_int == 0);
|
||||
int lea = (l->as_int == a->as_int);
|
||||
|
||||
if (lea && lez)
|
||||
return SUBMVREF_LEFT_ABOVE_ZED;
|
||||
|
||||
if (lea)
|
||||
return SUBMVREF_LEFT_ABOVE_SAME;
|
||||
|
||||
if (aez)
|
||||
return SUBMVREF_ABOVE_ZED;
|
||||
|
||||
if (lez)
|
||||
return SUBMVREF_LEFT_ZED;
|
||||
|
||||
return SUBMVREF_NORMAL;
|
||||
}
|
||||
|
||||
static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25};
|
||||
|
||||
const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] =
|
||||
{
|
||||
{ 147, 136, 18 },
|
||||
{ 106, 145, 1 },
|
||||
{ 179, 121, 1 },
|
||||
{ 223, 1 , 34 },
|
||||
{ 208, 1 , 1 }
|
||||
};
|
||||
|
||||
|
||||
|
||||
const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] =
|
||||
{
|
||||
{
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
2, 2, 3, 3,
|
||||
2, 2, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 2, 3,
|
||||
4, 5, 6, 7,
|
||||
8, 9, 10, 11,
|
||||
12, 13, 14, 15,
|
||||
}
|
||||
};
|
||||
|
||||
const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16};
|
||||
|
||||
const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150};
|
||||
|
||||
|
||||
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
|
||||
|
||||
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
|
||||
{
|
||||
-B_DC_PRED, 2, /* 0 = DC_NODE */
|
||||
-B_TM_PRED, 4, /* 1 = TM_NODE */
|
||||
-B_VE_PRED, 6, /* 2 = VE_NODE */
|
||||
8, 12, /* 3 = COM_NODE */
|
||||
-B_HE_PRED, 10, /* 4 = HE_NODE */
|
||||
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
|
||||
-B_LD_PRED, 14, /* 6 = LD_NODE */
|
||||
-B_VL_PRED, 16, /* 7 = VL_NODE */
|
||||
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
|
||||
};
|
||||
|
||||
/* Again, these trees use the same probability indices as their
|
||||
explicitly-programmed predecessors. */
|
||||
|
||||
const vp8_tree_index vp8_ymode_tree[8] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
4, 6,
|
||||
-V_PRED, -H_PRED,
|
||||
-TM_PRED, -B_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_kf_ymode_tree[8] =
|
||||
{
|
||||
-B_PRED, 2,
|
||||
4, 6,
|
||||
-DC_PRED, -V_PRED,
|
||||
-H_PRED, -TM_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_uv_mode_tree[6] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
-V_PRED, 4,
|
||||
-H_PRED, -TM_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_mbsplit_tree[6] =
|
||||
{
|
||||
-3, 2,
|
||||
-2, 4,
|
||||
-0, -1
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_mv_ref_tree[8] =
|
||||
{
|
||||
-ZEROMV, 2,
|
||||
-NEARESTMV, 4,
|
||||
-NEARMV, 6,
|
||||
-NEWMV, -SPLITMV
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_sub_mv_ref_tree[6] =
|
||||
{
|
||||
-LEFT4X4, 2,
|
||||
-ABOVE4X4, 4,
|
||||
-ZERO4X4, -NEW4X4
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_small_mvtree [14] =
|
||||
{
|
||||
2, 8,
|
||||
4, 6,
|
||||
-0, -1,
|
||||
-2, -3,
|
||||
10, 12,
|
||||
-4, -5,
|
||||
-6, -7
|
||||
};
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x)
|
||||
{
|
||||
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
}
|
||||
|
||||
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
|
||||
{
|
||||
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
}
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMODE_H_
|
||||
#define VP8_COMMON_ENTROPYMODE_H_
|
||||
|
||||
#include "onyxc_int.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SUBMVREF_NORMAL,
|
||||
SUBMVREF_LEFT_ZED,
|
||||
SUBMVREF_ABOVE_ZED,
|
||||
SUBMVREF_LEFT_ABOVE_SAME,
|
||||
SUBMVREF_LEFT_ABOVE_ZED
|
||||
} sumvfref_t;
|
||||
|
||||
typedef int vp8_mbsplit[16];
|
||||
|
||||
#define VP8_NUMMBSPLITS 4
|
||||
|
||||
extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
|
||||
|
||||
extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
|
||||
|
||||
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
|
||||
|
||||
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
|
||||
#define SUBMVREF_COUNT 5
|
||||
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
|
||||
|
||||
|
||||
extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
|
||||
|
||||
|
||||
extern const vp8_tree_index vp8_bmode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_kf_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_uv_mode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_mbsplit_tree[];
|
||||
extern const vp8_tree_index vp8_mv_ref_tree[];
|
||||
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES];
|
||||
extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES];
|
||||
extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES];
|
||||
extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES];
|
||||
extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS];
|
||||
|
||||
/* Inter mode values do not start at zero */
|
||||
|
||||
extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS];
|
||||
extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
|
||||
|
||||
extern const vp8_tree_index vp8_small_mvtree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_small_mvencodings[8];
|
||||
|
||||
/* Key frame default mode probs */
|
||||
extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
|
||||
[VP8_BINTRAMODES-1];
|
||||
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
|
||||
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x);
|
||||
void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
|
||||
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPYMODE_H_
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropymv.h"
|
||||
|
||||
const MV_CONTEXT vp8_mv_update_probs[2] =
|
||||
{
|
||||
{{
|
||||
237,
|
||||
246,
|
||||
253, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
|
||||
}},
|
||||
{{
|
||||
231,
|
||||
243,
|
||||
245, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
|
||||
}}
|
||||
};
|
||||
const MV_CONTEXT vp8_default_mv_context[2] =
|
||||
{
|
||||
{{
|
||||
/* row */
|
||||
162, /* is short */
|
||||
128, /* sign */
|
||||
225, 146, 172, 147, 214, 39, 156, /* short tree */
|
||||
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
|
||||
}},
|
||||
|
||||
|
||||
|
||||
{{
|
||||
/* same for column */
|
||||
164, /* is short */
|
||||
128,
|
||||
204, 170, 119, 235, 140, 230, 228,
|
||||
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
|
||||
|
||||
}}
|
||||
};
|
||||
@@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMV_H_
|
||||
#define VP8_COMMON_ENTROPYMV_H_
|
||||
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum
|
||||
{
|
||||
mv_max = 1023, /* max absolute value of a MV component */
|
||||
MVvals = (2 * mv_max) + 1, /* # possible values "" */
|
||||
mvfp_max = 255, /* max absolute value of a full pixel MV component */
|
||||
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
|
||||
|
||||
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
|
||||
mvnum_short = 8, /* magnitudes 0 through 7 */
|
||||
|
||||
/* probability offsets for coding each MV component */
|
||||
|
||||
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
|
||||
MVPsign, /* sign for non-zero */
|
||||
MVPshort, /* 8 short values = 7-position tree */
|
||||
|
||||
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
|
||||
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
|
||||
};
|
||||
|
||||
typedef struct mv_context
|
||||
{
|
||||
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
|
||||
} MV_CONTEXT;
|
||||
|
||||
extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPYMV_H_
|
||||
@@ -1,188 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "extend.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
|
||||
static void copy_and_extend_plane
|
||||
(
|
||||
unsigned char *s, /* source */
|
||||
int sp, /* source pitch */
|
||||
unsigned char *d, /* destination */
|
||||
int dp, /* destination pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
int el, /* extend left border */
|
||||
int eb, /* extend bottom border */
|
||||
int er /* extend right border */
|
||||
)
|
||||
{
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
int linesize;
|
||||
|
||||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = d - el;
|
||||
dest_ptr2 = d + w;
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
memset(dest_ptr1, src_ptr1[0], el);
|
||||
memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
|
||||
/* Now copy the top and bottom lines into each line of the respective
|
||||
* borders
|
||||
*/
|
||||
src_ptr1 = d - el;
|
||||
src_ptr2 = d + dp * (h - 1) - el;
|
||||
dest_ptr1 = d + dp * (-et) - el;
|
||||
dest_ptr2 = d + dp * (h) - el;
|
||||
linesize = el + er + w;
|
||||
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_height, src->y_width,
|
||||
et, el, eb, er);
|
||||
|
||||
et = dst->border >> 1;
|
||||
el = dst->border >> 1;
|
||||
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
|
||||
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer, src->uv_stride,
|
||||
dst->u_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer, src->uv_stride,
|
||||
dst->v_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
int src_y_offset = srcy * src->y_stride + srcx;
|
||||
int dst_y_offset = srcy * dst->y_stride + srcx;
|
||||
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
|
||||
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
|
||||
|
||||
/* If the side is not touching the bounder then don't extend. */
|
||||
if (srcy)
|
||||
et = 0;
|
||||
if (srcx)
|
||||
el = 0;
|
||||
if (srcy + srch != src->y_height)
|
||||
eb = 0;
|
||||
if (srcx + srcw != src->y_width)
|
||||
er = 0;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer + src_y_offset,
|
||||
src->y_stride,
|
||||
dst->y_buffer + dst_y_offset,
|
||||
dst->y_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
|
||||
et = (et + 1) >> 1;
|
||||
el = (el + 1) >> 1;
|
||||
eb = (eb + 1) >> 1;
|
||||
er = (er + 1) >> 1;
|
||||
srch = (srch + 1) >> 1;
|
||||
srcw = (srcw + 1) >> 1;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->u_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->v_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
/* note the extension is only for the last row, for intra prediction purpose */
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
|
||||
unsigned char *YPtr,
|
||||
unsigned char *UPtr,
|
||||
unsigned char *VPtr)
|
||||
{
|
||||
int i;
|
||||
|
||||
YPtr += ybf->y_stride * 14;
|
||||
UPtr += ybf->uv_stride * 6;
|
||||
VPtr += ybf->uv_stride * 6;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
|
||||
YPtr += ybf->y_stride;
|
||||
UPtr += ybf->uv_stride;
|
||||
VPtr += ybf->uv_stride;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_EXTEND_H_
|
||||
#define VP8_COMMON_EXTEND_H_
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst);
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_EXTEND_H_
|
||||
@@ -1,493 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "filter.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
static void filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = (unsigned char)Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Start next row */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int output_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_stride : Stride of source block.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the horizontal direction to produce the filtered output
|
||||
* block. Used to implement first-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_stride,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 dst_pitch : Destination block pitch.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the vertical direction to produce the filtered output
|
||||
* block. Used to implement second-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
dst_ptr += dst_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pitch : Stride of source block.
|
||||
* UINT32 dst_pitch : Stride of destination block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* INT32 Width : Block width
|
||||
* INT32 Height : Block height
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : 2-D filters an input block by applying a 2-tap
|
||||
* bi-linear filter horizontally followed by a 2-tap
|
||||
* bi-linear filter vertically on the result.
|
||||
*
|
||||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
unsigned char temp1[16];
|
||||
unsigned char temp2[16];
|
||||
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if (temp1[i] != temp2[i])
|
||||
{
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FILTER_H_
|
||||
#define VP8_COMMON_FILTER_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_FILTER_H_
|
||||
@@ -1,193 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "findnearmv.h"
|
||||
|
||||
const unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest,
|
||||
int_mv *nearby,
|
||||
int_mv *best_mv,
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
const MODE_INFO *above = here - xd->mode_info_stride;
|
||||
const MODE_INFO *left = here - 1;
|
||||
const MODE_INFO *aboveleft = above - 1;
|
||||
int_mv near_mvs[4];
|
||||
int_mv *mv = near_mvs;
|
||||
int *cntx = cnt;
|
||||
enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
|
||||
|
||||
/* Zero accumulators */
|
||||
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
|
||||
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
|
||||
|
||||
/* Process above */
|
||||
if (above->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (above->mbmi.mv.as_int)
|
||||
{
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 2;
|
||||
}
|
||||
|
||||
/* Process left */
|
||||
if (left->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (left->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 2;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 2;
|
||||
}
|
||||
|
||||
/* Process above left */
|
||||
if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (aboveleft->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 1;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 1;
|
||||
}
|
||||
|
||||
/* If we have three distinct MV's ... */
|
||||
if (cnt[CNT_SPLITMV])
|
||||
{
|
||||
/* See if above-left MV can be merged with NEAREST */
|
||||
if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
|
||||
cnt[CNT_NEAREST] += 1;
|
||||
}
|
||||
|
||||
cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
|
||||
+ (left->mbmi.mode == SPLITMV)) * 2
|
||||
+ (aboveleft->mbmi.mode == SPLITMV);
|
||||
|
||||
/* Swap near and nearest if necessary */
|
||||
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
|
||||
{
|
||||
int tmp;
|
||||
tmp = cnt[CNT_NEAREST];
|
||||
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
|
||||
cnt[CNT_NEAR] = tmp;
|
||||
tmp = near_mvs[CNT_NEAREST].as_int;
|
||||
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
|
||||
near_mvs[CNT_NEAR].as_int = tmp;
|
||||
}
|
||||
|
||||
/* Use near_mvs[0] to store the "best" MV */
|
||||
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
|
||||
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
|
||||
|
||||
/* Set up return values */
|
||||
best_mv->as_int = near_mvs[0].as_int;
|
||||
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
|
||||
nearby->as_int = near_mvs[CNT_NEAR].as_int;
|
||||
}
|
||||
|
||||
|
||||
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd)
|
||||
{
|
||||
inv->as_mv.row = src->as_mv.row * -1;
|
||||
inv->as_mv.col = src->as_mv.col * -1;
|
||||
vp8_clamp_mv2(inv, xd);
|
||||
vp8_clamp_mv2(src, xd);
|
||||
}
|
||||
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
int sign_bias = ref_frame_sign_bias[refframe];
|
||||
|
||||
vp8_find_near_mvs(xd,
|
||||
here,
|
||||
&mode_mv_sb[sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV],
|
||||
&best_mv_sb[sign_bias],
|
||||
cnt,
|
||||
refframe,
|
||||
ref_frame_sign_bias);
|
||||
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARESTMV], xd);
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV], xd);
|
||||
invert_and_clamp_mvs(&best_mv_sb[!sign_bias],
|
||||
&best_mv_sb[sign_bias], xd);
|
||||
|
||||
return sign_bias;
|
||||
}
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
)
|
||||
{
|
||||
p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
|
||||
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
|
||||
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
|
||||
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
|
||||
/*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -1,195 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FINDNEARMV_H_
|
||||
#define VP8_COMMON_FINDNEARMV_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "mv.h"
|
||||
#include "blockd.h"
|
||||
#include "modecont.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
|
||||
int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
mvp->as_mv.row *= -1;
|
||||
mvp->as_mv.col *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge, int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
|
||||
mb_to_left_edge : mv->as_mv.col;
|
||||
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
|
||||
mb_to_right_edge : mv->as_mv.col;
|
||||
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
|
||||
mb_to_top_edge : mv->as_mv.row;
|
||||
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
|
||||
mb_to_bottom_edge : mv->as_mv.row;
|
||||
}
|
||||
static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge,
|
||||
int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
unsigned int need_to_clamp;
|
||||
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
|
||||
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
|
||||
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
|
||||
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
|
||||
return need_to_clamp;
|
||||
}
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest, int_mv *nearby, int_mv *best,
|
||||
int near_mv_ref_cts[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
);
|
||||
|
||||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
|
||||
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->mv.as_int;
|
||||
}
|
||||
|
||||
static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + (b - 4))->mv.as_int;
|
||||
}
|
||||
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 3)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->as_mode;
|
||||
}
|
||||
|
||||
static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 12)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->as_mode;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_FINDNEARMV_H_
|
||||
@@ -1,104 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#if ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#endif
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "vp8/common/systemdependent.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
#include <unistd.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
#elif defined(__OS2__)
|
||||
#define INCL_DOS
|
||||
#define INCL_DOSSPINLOCK
|
||||
#include <os2.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
static int get_cpu_count()
|
||||
{
|
||||
int core_count = 16;
|
||||
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
#if defined(_SC_NPROCESSORS_ONLN)
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(_SC_NPROC_ONLN)
|
||||
core_count = sysconf(_SC_NPROC_ONLN);
|
||||
#endif
|
||||
#elif defined(_WIN32)
|
||||
{
|
||||
#if _WIN32_WINNT >= 0x0501
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetNativeSystemInfo(&sysinfo);
|
||||
#else
|
||||
PGNSI pGNSI;
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
/* Call GetNativeSystemInfo if supported or
|
||||
* GetSystemInfo otherwise. */
|
||||
|
||||
pGNSI = (PGNSI) GetProcAddress(
|
||||
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
|
||||
if (pGNSI != NULL)
|
||||
pGNSI(&sysinfo);
|
||||
else
|
||||
GetSystemInfo(&sysinfo);
|
||||
#endif
|
||||
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#elif defined(__OS2__)
|
||||
{
|
||||
ULONG proc_id;
|
||||
ULONG status;
|
||||
|
||||
core_count = 0;
|
||||
for (proc_id = 1; ; proc_id++)
|
||||
{
|
||||
if (DosGetProcessorStatus(proc_id, &status))
|
||||
break;
|
||||
|
||||
if (status == PROC_ONLINE)
|
||||
core_count++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* other platforms */
|
||||
#endif
|
||||
|
||||
return core_count > 0 ? core_count : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_clear_system_state_c() {};
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
ctx->processor_core_count = get_cpu_count();
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
|
||||
#if ARCH_ARM
|
||||
ctx->cpu_caps = arm_cpu_caps();
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
ctx->cpu_caps = x86_simd_caps();
|
||||
#endif
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_HEADER_H_
|
||||
#define VP8_COMMON_HEADER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* 24 bits total */
|
||||
typedef struct
|
||||
{
|
||||
unsigned int type: 1;
|
||||
unsigned int version: 3;
|
||||
unsigned int show_frame: 1;
|
||||
|
||||
/* Allow 2^20 bytes = 8 megabits for first partition */
|
||||
|
||||
unsigned int first_partition_length_in_bytes: 19;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
unsigned int frame_number;
|
||||
unsigned int update_gold: 1;
|
||||
unsigned int uses_gold: 1;
|
||||
unsigned int update_last: 1;
|
||||
unsigned int uses_last: 1;
|
||||
#endif
|
||||
|
||||
} VP8_HEADER;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
#define VP8_HEADER_SIZE 8
|
||||
#else
|
||||
#define VP8_HEADER_SIZE 3
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_HEADER_H_
|
||||
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride);
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dst, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4*stride - 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstu, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
dstu += 4*stride - 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstv, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
dstv += 4*stride - 8;
|
||||
}
|
||||
}
|
||||
@@ -1,205 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Notes:
|
||||
*
|
||||
* This implementation makes use of 16 bit fixed point verio of two multiply
|
||||
* constants:
|
||||
* 1. sqrt(2) * cos (pi/8)
|
||||
* 2. sqrt(2) * sin (pi/8)
|
||||
* Becuase the first constant is bigger than 1, to maintain the same 16 bit
|
||||
* fixed point precision as the second one, we use a trick of
|
||||
* x * a = x + x*(a-1)
|
||||
* so
|
||||
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
**************************************************************************/
|
||||
static const int cospi8sqrt2minus1 = 20091;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int i;
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
op[shortpitch*0] = a1 + d1;
|
||||
op[shortpitch*3] = a1 - d1;
|
||||
|
||||
op[shortpitch*1] = b1 + c1;
|
||||
op[shortpitch*2] = b1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
ip += shortpitch;
|
||||
op += shortpitch;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = ip[c] + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int a1 = ((input_dc + 4) >> 3);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = a1 + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[3];
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3];
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = c1 + d1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
op[0] = (a2 + 3) >> 3;
|
||||
op[1] = (b2 + 3) >> 3;
|
||||
op[2] = (c2 + 3) >> 3;
|
||||
op[3] = (d2 + 3) >> 3;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
int i;
|
||||
int a1;
|
||||
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = a1;
|
||||
}
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_INVTRANS_H_
|
||||
#define VP8_COMMON_INVTRANS_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static void eob_adjust(char *eobs, short *diff)
|
||||
{
|
||||
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
|
||||
int js;
|
||||
for(js = 0; js < 16; js++)
|
||||
{
|
||||
if((eobs[js] == 0) && (diff[0] != 0))
|
||||
eobs[js]++;
|
||||
diff+=16;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
|
||||
{
|
||||
short *DQC = xd->dequant_y1;
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
/* do 2nd order transform on the dc block */
|
||||
if (xd->eobs[24] > 1)
|
||||
{
|
||||
vp8_short_inv_walsh4x4
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_short_inv_walsh4x4_1
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
eob_adjust(xd->eobs, xd->qcoeff);
|
||||
|
||||
DQC = xd->dequant_y1_dc;
|
||||
}
|
||||
vp8_dequant_idct_add_y_block
|
||||
(xd->qcoeff, DQC,
|
||||
xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->eobs);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_INVTRANS_H_
|
||||
@@ -1,113 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_LOOPFILTER_H_
|
||||
#define VP8_COMMON_LOOPFILTER_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
/* fraction of total macroblock rows to be used in fast filter level picking */
|
||||
/* has to be > 2 */
|
||||
#define PARTIAL_FRAME_FRACTION 8
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NORMAL_LOOPFILTER = 0,
|
||||
SIMPLE_LOOPFILTER = 1
|
||||
} LOOPFILTERTYPE;
|
||||
|
||||
#if ARCH_ARM
|
||||
#define SIMD_WIDTH 1
|
||||
#else
|
||||
#define SIMD_WIDTH 16
|
||||
#endif
|
||||
|
||||
/* Need to align this structure so when it is declared and
|
||||
* passed it can be loaded into vector registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||
unsigned char lvl[4][4][4];
|
||||
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||
unsigned char mode_lf_lut[10];
|
||||
} loop_filter_info_n;
|
||||
|
||||
typedef struct loop_filter_info
|
||||
{
|
||||
const unsigned char * mblim;
|
||||
const unsigned char * blim;
|
||||
const unsigned char * lim;
|
||||
const unsigned char * hev_thr;
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
typedef void loop_filter_uvfunction
|
||||
(
|
||||
unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
unsigned char *v
|
||||
);
|
||||
|
||||
/* assorted loopfilter functions which get used elsewhere */
|
||||
struct VP8Common;
|
||||
struct macroblockd;
|
||||
struct modeinfo;
|
||||
|
||||
void vp8_loop_filter_init(struct VP8Common *cm);
|
||||
|
||||
void vp8_loop_filter_frame_init(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
|
||||
int frame_type);
|
||||
|
||||
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl);
|
||||
|
||||
void vp8_loop_filter_row_normal(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
|
||||
void vp8_loop_filter_row_simple(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_LOOPFILTER_H_
|
||||
@@ -1,430 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
static signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
t = (t > 127 ? 127 : t);
|
||||
return (signed char) t;
|
||||
}
|
||||
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_filter_mask(uc limit, uc blimit,
|
||||
uc p3, uc p2, uc p1, uc p0,
|
||||
uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit);
|
||||
mask |= (abs(p2 - p1) > limit);
|
||||
mask |= (abs(p1 - p0) > limit);
|
||||
mask |= (abs(q1 - q0) > limit);
|
||||
mask |= (abs(q2 - q1) > limit);
|
||||
mask |= (abs(q3 - q2) > limit);
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
|
||||
return mask - 1;
|
||||
}
|
||||
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
hev |= (abs(q1 - q0) > thresh) * -1;
|
||||
return hev;
|
||||
}
|
||||
|
||||
static void vp8_filter(signed char mask, uc hev, uc *op1,
|
||||
uc *op0, uc *oq0, uc *oq1)
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
signed char ps1, qs1;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char u;
|
||||
|
||||
ps1 = (signed char) * op1 ^ 0x80;
|
||||
ps0 = (signed char) * op0 ^ 0x80;
|
||||
qs0 = (signed char) * oq0 ^ 0x80;
|
||||
qs1 = (signed char) * oq1 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value &= hev;
|
||||
|
||||
/* inner taps */
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3
|
||||
* if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
* we'd round 3 the other way
|
||||
*/
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
filter_value = Filter1;
|
||||
|
||||
/* outer tap adjustments */
|
||||
filter_value += 1;
|
||||
filter_value >>= 1;
|
||||
filter_value &= ~hev;
|
||||
|
||||
u = vp8_signed_char_clamp(qs1 - filter_value);
|
||||
*oq1 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps1 + filter_value);
|
||||
*op1 = u ^ 0x80;
|
||||
|
||||
}
|
||||
void vp8_loop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
static void vp8_mbfilter(signed char mask, uc hev,
|
||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||
{
|
||||
signed char s, u;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char ps2 = (signed char) * op2 ^ 0x80;
|
||||
signed char ps1 = (signed char) * op1 ^ 0x80;
|
||||
signed char ps0 = (signed char) * op0 ^ 0x80;
|
||||
signed char qs0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char qs1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char qs2 = (signed char) * oq2 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
Filter2 = filter_value;
|
||||
Filter2 &= hev;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
|
||||
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
|
||||
|
||||
/* only apply wider filter if not high edge variance */
|
||||
filter_value &= ~hev;
|
||||
Filter2 = filter_value;
|
||||
|
||||
/* roughly 3/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
|
||||
s = vp8_signed_char_clamp(qs0 - u);
|
||||
*oq0 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps0 + u);
|
||||
*op0 = s ^ 0x80;
|
||||
|
||||
/* roughly 2/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
|
||||
s = vp8_signed_char_clamp(qs1 - u);
|
||||
*oq1 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps1 + u);
|
||||
*op1 = s ^ 0x80;
|
||||
|
||||
/* roughly 1/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
|
||||
s = vp8_signed_char_clamp(qs2 - u);
|
||||
*oq2 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps2 + u);
|
||||
*op2 = s ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
|
||||
}
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
{
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char p1 = (signed char) * op1 ^ 0x80;
|
||||
signed char p0 = (signed char) * op0 ^ 0x80;
|
||||
signed char q0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char q1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char u;
|
||||
|
||||
filter_value = vp8_signed_char_clamp(p1 - q1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter1 >>= 3;
|
||||
u = vp8_signed_char_clamp(q0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(p0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
}
|
||||
while (++i < 16);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
}
|
||||
while (++i < 16);
|
||||
|
||||
}
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
|
||||
void vp8_setup_block_dptrs(MACROBLOCKD *x)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 25; r++)
|
||||
{
|
||||
x->block[r].qcoeff = x->qcoeff + r * 16;
|
||||
x->block[r].dqcoeff = x->dqcoeff + r * 16;
|
||||
x->block[r].eob = x->eobs + r;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_block_doffsets(MACROBLOCKD *x)
|
||||
{
|
||||
int block;
|
||||
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
x->block[block].offset =
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
x->block[block+4].offset =
|
||||
x->block[block].offset =
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
|
||||
}
|
||||
}
|
||||
@@ -1,386 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/* MFQE: Multiframe Quality Enhancement
|
||||
* In rate limited situations keyframes may cause significant visual artifacts
|
||||
* commonly referred to as "popping." This file implements a postproccesing
|
||||
* algorithm which blends data from the preceeding frame when there is no
|
||||
* motion and the q from the previous frame is lower which indicates that it is
|
||||
* higher quality.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8/common/postproc.h"
|
||||
#include "vpx_dsp/variance.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static void filter_by_weight(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int block_size, int src_weight)
|
||||
{
|
||||
int dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int rounding_bit = 1 << (MFQE_PRECISION - 1);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < block_size; r++)
|
||||
{
|
||||
for (c = 0; c < block_size; c++)
|
||||
{
|
||||
dst[c] = (src[c] * src_weight +
|
||||
dst[c] * dst_weight +
|
||||
rounding_bit) >> MFQE_PRECISION;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
|
||||
unsigned char *dst, int dst_stride,
|
||||
int src_weight)
|
||||
{
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
|
||||
}
|
||||
|
||||
static void apply_ifactor(unsigned char *y_src,
|
||||
int y_src_stride,
|
||||
unsigned char *y_dst,
|
||||
int y_dst_stride,
|
||||
unsigned char *u_src,
|
||||
unsigned char *v_src,
|
||||
int uv_src_stride,
|
||||
unsigned char *u_dst,
|
||||
unsigned char *v_dst,
|
||||
int uv_dst_stride,
|
||||
int block_size,
|
||||
int src_weight)
|
||||
{
|
||||
if (block_size == 16)
|
||||
{
|
||||
vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
|
||||
vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
|
||||
vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
|
||||
}
|
||||
else /* if (block_size == 8) */
|
||||
{
|
||||
vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
|
||||
vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
|
||||
vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int int_sqrt(unsigned int x)
|
||||
{
|
||||
unsigned int y = x;
|
||||
unsigned int guess;
|
||||
int p = 1;
|
||||
while (y>>=1) p++;
|
||||
p>>=1;
|
||||
|
||||
guess=0;
|
||||
while (p>=0)
|
||||
{
|
||||
guess |= (1<<p);
|
||||
if (x<guess*guess)
|
||||
guess -= (1<<p);
|
||||
p--;
|
||||
}
|
||||
/* choose between guess or guess+1 */
|
||||
return guess+(guess*guess+guess+1<=x);
|
||||
}
|
||||
|
||||
#define USE_SSD
|
||||
static void multiframe_quality_enhance_block
|
||||
(
|
||||
int blksize, /* Currently only values supported are 16, 8 */
|
||||
int qcurr,
|
||||
int qprev,
|
||||
unsigned char *y,
|
||||
unsigned char *u,
|
||||
unsigned char *v,
|
||||
int y_stride,
|
||||
int uv_stride,
|
||||
unsigned char *yd,
|
||||
unsigned char *ud,
|
||||
unsigned char *vd,
|
||||
int yd_stride,
|
||||
int uvd_stride
|
||||
)
|
||||
{
|
||||
static const unsigned char VP8_ZEROS[16]=
|
||||
{
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||
};
|
||||
int uvblksize = blksize >> 1;
|
||||
int qdiff = qcurr - qprev;
|
||||
|
||||
int i;
|
||||
unsigned char *up;
|
||||
unsigned char *udp;
|
||||
unsigned char *vp;
|
||||
unsigned char *vdp;
|
||||
|
||||
unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
|
||||
|
||||
if (blksize == 16)
|
||||
{
|
||||
actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 128)>>8;
|
||||
vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 32)>>6;
|
||||
vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 32)>>6;
|
||||
#else
|
||||
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
|
||||
usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
|
||||
vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride)+ 32) >> 6;
|
||||
#endif
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
|
||||
#ifdef USE_SSD
|
||||
vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
|
||||
sad = (sse + 32)>>6;
|
||||
vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
|
||||
usad = (sse + 8)>>4;
|
||||
vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
|
||||
vsad = (sse + 8)>>4;
|
||||
#else
|
||||
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
|
||||
usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
|
||||
vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
|
||||
#endif
|
||||
}
|
||||
|
||||
actrisk = (actd > act * 5);
|
||||
|
||||
/* thr = qdiff/16 + log2(act) + log4(qprev) */
|
||||
thr = (qdiff >> 4);
|
||||
while (actd >>= 1) thr++;
|
||||
while (qprev >>= 2) thr++;
|
||||
|
||||
#ifdef USE_SSD
|
||||
thrsq = thr * thr;
|
||||
if (sad < thrsq &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
|
||||
#else
|
||||
if (sad < thr &&
|
||||
/* additional checks for color mismatch and excessive addition of
|
||||
* high-frequencies */
|
||||
2 * usad < thr && 2 * vsad < thr && !actrisk)
|
||||
#endif
|
||||
{
|
||||
int ifactor;
|
||||
#ifdef USE_SSD
|
||||
/* TODO: optimize this later to not need sqr root */
|
||||
sad = int_sqrt(sad);
|
||||
#endif
|
||||
ifactor = (sad << MFQE_PRECISION) / thr;
|
||||
ifactor >>= (qdiff >> 5);
|
||||
|
||||
if (ifactor)
|
||||
{
|
||||
apply_ifactor(y, y_stride, yd, yd_stride,
|
||||
u, v, uv_stride,
|
||||
ud, vd, uvd_stride,
|
||||
blksize, ifactor);
|
||||
}
|
||||
}
|
||||
else /* else implicitly copy from previous frame */
|
||||
{
|
||||
if (blksize == 16)
|
||||
{
|
||||
vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
|
||||
vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
|
||||
vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
|
||||
}
|
||||
else /* if (blksize == 8) */
|
||||
{
|
||||
vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
|
||||
for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
|
||||
memcpy(udp, up, uvblksize);
|
||||
for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
|
||||
memcpy(vdp, vp, uvblksize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map)
|
||||
{
|
||||
if (mode_info_context->mbmi.mb_skip_coeff)
|
||||
map[0] = map[1] = map[2] = map[3] = 1;
|
||||
else if (mode_info_context->mbmi.mode==SPLITMV)
|
||||
{
|
||||
static int ndx[4][4] =
|
||||
{
|
||||
{0, 1, 4, 5},
|
||||
{2, 3, 6, 7},
|
||||
{8, 9, 12, 13},
|
||||
{10, 11, 14, 15}
|
||||
};
|
||||
int i, j;
|
||||
for (i=0; i<4; ++i)
|
||||
{
|
||||
map[i] = 1;
|
||||
for (j=0; j<4 && map[j]; ++j)
|
||||
map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
|
||||
mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
map[0] = map[1] = map[2] = map[3] =
|
||||
(mode_info_context->mbmi.mode > B_PRED &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
|
||||
abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
|
||||
}
|
||||
return (map[0]+map[1]+map[2]+map[3]);
|
||||
}
|
||||
|
||||
void vp8_multiframe_quality_enhance
|
||||
(
|
||||
VP8_COMMON *cm
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *show = cm->frame_to_show;
|
||||
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
|
||||
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
/* Point at base of Mb MODE_INFO list has motion vectors etc */
|
||||
const MODE_INFO *mode_info_context = cm->show_frame_mi;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int totmap, map[4];
|
||||
int qcurr = cm->base_qindex;
|
||||
int qprev = cm->postproc_state.last_base_qindex;
|
||||
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = show->y_buffer;
|
||||
u_ptr = show->u_buffer;
|
||||
v_ptr = show->v_buffer;
|
||||
yd_ptr = dest->y_buffer;
|
||||
ud_ptr = dest->u_buffer;
|
||||
vd_ptr = dest->v_buffer;
|
||||
|
||||
/* postprocess each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
/* if motion is high there will likely be no benefit */
|
||||
if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map);
|
||||
else totmap = (frame_type == KEY_FRAME ? 4 : 0);
|
||||
if (totmap)
|
||||
{
|
||||
if (totmap < 4)
|
||||
{
|
||||
int i, j;
|
||||
for (i=0; i<2; ++i)
|
||||
for (j=0; j<2; ++j)
|
||||
{
|
||||
if (map[i*2+j])
|
||||
{
|
||||
multiframe_quality_enhance_block(8, qcurr, qprev,
|
||||
y_ptr + 8*(i*show->y_stride+j),
|
||||
u_ptr + 4*(i*show->uv_stride+j),
|
||||
v_ptr + 4*(i*show->uv_stride+j),
|
||||
show->y_stride,
|
||||
show->uv_stride,
|
||||
yd_ptr + 8*(i*dest->y_stride+j),
|
||||
ud_ptr + 4*(i*dest->uv_stride+j),
|
||||
vd_ptr + 4*(i*dest->uv_stride+j),
|
||||
dest->y_stride,
|
||||
dest->uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* copy a 8x8 block */
|
||||
int k;
|
||||
unsigned char *up = u_ptr + 4*(i*show->uv_stride+j);
|
||||
unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j);
|
||||
unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j);
|
||||
unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j);
|
||||
vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride,
|
||||
yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride);
|
||||
for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
|
||||
vp += show->uv_stride, vdp += dest->uv_stride)
|
||||
{
|
||||
memcpy(udp, up, 4);
|
||||
memcpy(vdp, vp, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* totmap = 4 */
|
||||
{
|
||||
multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
|
||||
u_ptr, v_ptr,
|
||||
show->y_stride,
|
||||
show->uv_stride,
|
||||
yd_ptr, ud_ptr, vd_ptr,
|
||||
dest->y_stride,
|
||||
dest->uv_stride);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
|
||||
vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
|
||||
vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
|
||||
}
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
yd_ptr += 16;
|
||||
ud_ptr += 8;
|
||||
vd_ptr += 8;
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
|
||||
u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
|
||||
ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
|
||||
|
||||
mode_info_context++; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
void vp8_dequant_idct_add_dspr2(short *input, short *dq,
|
||||
unsigned char *dest, int stride)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,88 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
|
||||
void vp8_dequant_idct_add_y_block_dspr2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dst, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4 * stride - 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_dspr2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstu, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
dstu += 4 * stride - 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_dspr2(q, dq, dstv, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_dspr2(q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
dstv += 4 * stride - 8;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,369 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
#define CROP_WIDTH 256
|
||||
|
||||
/******************************************************************************
|
||||
* Notes:
|
||||
*
|
||||
* This implementation makes use of 16 bit fixed point version of two multiply
|
||||
* constants:
|
||||
* 1. sqrt(2) * cos (pi/8)
|
||||
* 2. sqrt(2) * sin (pi/8)
|
||||
* Since the first constant is bigger than 1, to maintain the same 16 bit
|
||||
* fixed point precision as the second one, we use a trick of
|
||||
* x * a = x + x*(a-1)
|
||||
* so
|
||||
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
****************************************************************************/
|
||||
extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
|
||||
static const int cospi8sqrt2minus1 = 20091;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
|
||||
inline void prefetch_load_short(short *src)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"pref 0, 0(%[src]) \n\t"
|
||||
:
|
||||
: [src] "r" (src)
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
|
||||
int c2, d2;
|
||||
int temp3, temp4;
|
||||
unsigned char *cm = ff_cropTbl + CROP_WIDTH;
|
||||
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + 8);
|
||||
|
||||
/* first loop is unrolled */
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[0] = a1 + d1;
|
||||
op[12] = a1 - d1;
|
||||
op[4] = b1 + c1;
|
||||
op[8] = b1 - c1;
|
||||
|
||||
a1 = ip[1] + ip[9];
|
||||
b1 = ip[1] - ip[9];
|
||||
|
||||
op[1] = a1 + d2;
|
||||
op[13] = a1 - d2;
|
||||
op[5] = b1 + c2;
|
||||
op[9] = b1 - c2;
|
||||
|
||||
a1 = ip[2] + ip[10];
|
||||
b1 = ip[2] - ip[10];
|
||||
|
||||
temp1 = (ip[6] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[14] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[2] = a1 + d1;
|
||||
op[14] = a1 - d1;
|
||||
op[6] = b1 + c1;
|
||||
op[10] = b1 - c1;
|
||||
|
||||
a1 = ip[3] + ip[11];
|
||||
b1 = ip[3] - ip[11];
|
||||
|
||||
op[3] = a1 + d2;
|
||||
op[15] = a1 - d2;
|
||||
op[7] = b1 + c2;
|
||||
op[11] = b1 - c2;
|
||||
|
||||
ip = output;
|
||||
|
||||
/* prepare data for load */
|
||||
prefetch_load_short(ip + shortpitch);
|
||||
|
||||
/* second loop is unrolled */
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[5] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[7] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
a1 = ip[4] + ip[6];
|
||||
b1 = ip[4] - ip[6];
|
||||
|
||||
op[4] = (a1 + d2 + 4) >> 3;
|
||||
op[7] = (a1 - d2 + 4) >> 3;
|
||||
op[5] = (b1 + c2 + 4) >> 3;
|
||||
op[6] = (b1 - c2 + 4) >> 3;
|
||||
|
||||
a1 = ip[8] + ip[10];
|
||||
b1 = ip[8] - ip[10];
|
||||
|
||||
temp1 = (ip[9] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[11] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
temp3 = (ip[13] * sinpi8sqrt2) >> 16;
|
||||
temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16);
|
||||
c2 = temp3 - temp4;
|
||||
|
||||
temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16);
|
||||
temp4 = (ip[15] * sinpi8sqrt2) >> 16;
|
||||
d2 = temp3 + temp4;
|
||||
|
||||
op[8] = (a1 + d1 + 4) >> 3;
|
||||
op[11] = (a1 - d1 + 4) >> 3;
|
||||
op[9] = (b1 + c1 + 4) >> 3;
|
||||
op[10] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
a1 = ip[12] + ip[14];
|
||||
b1 = ip[12] - ip[14];
|
||||
|
||||
op[12] = (a1 + d2 + 4) >> 3;
|
||||
op[15] = (a1 - d2 + 4) >> 3;
|
||||
op[13] = (b1 + c2 + 4) >> 3;
|
||||
op[14] = (b1 - c2 + 4) >> 3;
|
||||
|
||||
ip = output;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
short a = ip[c] + pred_ptr[c] ;
|
||||
dst_ptr[c] = cm[a] ;
|
||||
}
|
||||
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride)
|
||||
{
|
||||
int a1;
|
||||
int i, absa1;
|
||||
int t2, vector_a1, vector_a;
|
||||
|
||||
/* a1 = ((input_dc + 4) >> 3); */
|
||||
__asm__ __volatile__ (
|
||||
"addi %[a1], %[input_dc], 4 \n\t"
|
||||
"sra %[a1], %[a1], 3 \n\t"
|
||||
: [a1] "=r" (a1)
|
||||
: [input_dc] "r" (input_dc)
|
||||
);
|
||||
|
||||
if (a1 < 0)
|
||||
{
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"abs %[absa1], %[a1] \n\t"
|
||||
"replv.qb %[vector_a1], %[absa1] \n\t"
|
||||
: [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
|
||||
: [a1] "r" (a1)
|
||||
);
|
||||
|
||||
/* use (a1 - predptr[c]) instead a1 + predptr[c] */
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
|
||||
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
|
||||
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* use quad-byte
|
||||
* input and output memory are four byte aligned
|
||||
*/
|
||||
__asm__ __volatile__ (
|
||||
"replv.qb %[vector_a1], %[a1] \n\t"
|
||||
: [vector_a1] "=r" (vector_a1)
|
||||
: [a1] "r" (a1)
|
||||
);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"lw %[t2], 0(%[pred_ptr]) \n\t"
|
||||
"add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t"
|
||||
"addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t"
|
||||
"sw %[vector_a], 0(%[dst_ptr]) \n\t"
|
||||
"add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t"
|
||||
: [t2] "=&r" (t2), [vector_a] "=&r" (vector_a),
|
||||
[dst_ptr] "+&r" (dst_ptr), [pred_ptr] "+&r" (pred_ptr)
|
||||
: [dst_stride] "r" (dst_stride), [pred_stride] "r" (pred_stride), [vector_a1] "r" (vector_a1)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
prefetch_load_short(ip);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
prefetch_load_short(ip);
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
a1 = ip[0] + ip[3] + 3;
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3] + 3;
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = d1 + c1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
op[0] = a2 >> 3;
|
||||
op[1] = b2 >> 3;
|
||||
op[2] = c2 >> 3;
|
||||
op[3] = d2 >> 3;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
int a1;
|
||||
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sh %[a1], 0(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 32(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 64(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 96(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 128(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 160(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 192(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 224(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 256(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 288(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 320(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 352(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 384(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 416(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 448(%[mb_dqcoeff]) \n\t"
|
||||
"sh %[a1], 480(%[mb_dqcoeff]) \n\t"
|
||||
|
||||
:
|
||||
: [a1] "r" (a1), [mb_dqcoeff] "r" (mb_dqcoeff)
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#if HAVE_DSPR2
|
||||
inline void prefetch_load_int(unsigned char *src)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"pref 0, 0(%[src]) \n\t"
|
||||
:
|
||||
: [src] "r" (src)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
__inline void vp8_copy_mem16x16_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
for (r = 16; r--;)
|
||||
{
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"ulw %[a2], 8(%[src]) \n\t"
|
||||
"ulw %[a3], 12(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
"sw %[a2], 8(%[dst]) \n\t"
|
||||
"sw %[a3], 12(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1),
|
||||
[a2] "=&r" (a2), [a3] "=&r" (a3)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__inline void vp8_copy_mem8x8_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
for (r = 8; r--;)
|
||||
{
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__inline void vp8_copy_mem8x4_dspr2(
|
||||
unsigned char *RESTRICT src,
|
||||
int src_stride,
|
||||
unsigned char *RESTRICT dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
unsigned int a0, a1;
|
||||
|
||||
/* load src data in cache memory */
|
||||
prefetch_load_int(src + src_stride);
|
||||
|
||||
for (r = 4; r--;)
|
||||
{
|
||||
/* use unaligned memory load and store */
|
||||
__asm__ __volatile__ (
|
||||
"ulw %[a0], 0(%[src]) \n\t"
|
||||
"ulw %[a1], 4(%[src]) \n\t"
|
||||
"sw %[a0], 0(%[dst]) \n\t"
|
||||
"sw %[a1], 4(%[dst]) \n\t"
|
||||
: [a0] "=&r" (a0), [a1] "=&r" (a1)
|
||||
: [src] "r" (src), [dst] "r" (dst)
|
||||
);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,911 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
DECLARE_ALIGNED(16, static const int8_t, vp8_bilinear_filters_msa[7][2]) =
|
||||
{
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
static const uint8_t vp8_mc_filt_mask_arr[16 * 3] =
|
||||
{
|
||||
/* 8 width cases */
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
/* 4 width cases */
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
|
||||
/* 4 width cases */
|
||||
8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
|
||||
};
|
||||
|
||||
static void common_hz_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, mask;
|
||||
v16u8 filt0, vec0, vec1, res0, res1;
|
||||
v8u16 vec2, vec3, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
|
||||
SRARI_H2_UH(vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hz_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16u8 vec0, vec1, vec2, vec3, filt0;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
|
||||
v16i8 res0, res1, res2, res3;
|
||||
v8u16 vec4, vec5, vec6, vec7, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec4, vec5, vec6, vec7);
|
||||
SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT);
|
||||
PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
|
||||
res0, res1, res2, res3);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hz_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else if (8 == height)
|
||||
{
|
||||
common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hz_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16u8 filt0;
|
||||
v16i8 src0, src1, src2, src3, mask;
|
||||
v8u16 vec0, vec1, vec2, vec3, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
|
||||
ST8x4_UB(src0, src1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hz_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
v16u8 filt0;
|
||||
v16i8 src0, src1, src2, src3, mask, out0, out1;
|
||||
v8u16 vec0, vec1, vec2, vec3, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
if (16 == height)
|
||||
{
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
LD_SB4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
|
||||
VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
vec0, vec1, vec2, vec3);
|
||||
SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hz_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hz_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
|
||||
v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
||||
v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
loop_cnt = (height >> 2) - 1;
|
||||
|
||||
filt = LD_UH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB4(src, src_stride, src0, src2, src4, src6);
|
||||
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
|
||||
VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
out0, out1, out2, out3);
|
||||
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
|
||||
out4, out5, out6, out7);
|
||||
SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT);
|
||||
SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(out0, out1, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out2, out3, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out4, out5, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out6, out7, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
for (; loop_cnt--;)
|
||||
{
|
||||
LD_SB4(src, src_stride, src0, src2, src4, src6);
|
||||
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
|
||||
VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
|
||||
VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
|
||||
VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
out0, out1, out2, out3);
|
||||
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
|
||||
out4, out5, out6, out7);
|
||||
SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT);
|
||||
SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(out0, out1, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out2, out3, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out4, out5, dst);
|
||||
dst += dst_stride;
|
||||
PCKEV_ST_SB(out6, out7, dst);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4;
|
||||
v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
|
||||
v16u8 filt0;
|
||||
v8i16 filt;
|
||||
v8u16 tmp0, tmp1;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
src += (5 * src_stride);
|
||||
|
||||
ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
|
||||
src10_r, src21_r, src32_r, src43_r);
|
||||
ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
|
||||
DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
|
||||
ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_vt_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
|
||||
v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
|
||||
v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v16u8 filt0;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
|
||||
src8 = LD_SB(src);
|
||||
src += src_stride;
|
||||
|
||||
ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
|
||||
src32_r, src43_r);
|
||||
ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
|
||||
src76_r, src87_r);
|
||||
ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
|
||||
src87_r, src76_r, src2110, src4332, src6554, src8776);
|
||||
DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
|
||||
ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
|
||||
ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
|
||||
}
|
||||
|
||||
static void common_vt_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else if (8 == height)
|
||||
{
|
||||
common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter)
|
||||
{
|
||||
v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
|
||||
v16i8 out0, out1;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
|
||||
ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_vt_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v16i8 out0, out1;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LD_UB(src);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 3); loop_cnt--;)
|
||||
{
|
||||
LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
|
||||
src += (8 * src_stride);
|
||||
|
||||
ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
|
||||
vec0, vec1, vec2, vec3);
|
||||
ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
|
||||
vec4, vec5, vec6, vec7);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
|
||||
tmp0, tmp1, tmp2, tmp3);
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
src0 = src8;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
|
||||
height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_vt_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter, int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16u8 src0, src1, src2, src3, src4;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
|
||||
v8u16 tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
filt = LD_SH(filter);
|
||||
filt0 = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LD_UB(src);
|
||||
src += src_stride;
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;)
|
||||
{
|
||||
LD_UB4(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
|
||||
ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
|
||||
ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
|
||||
DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp0, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
|
||||
ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
|
||||
DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
|
||||
SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp2, tmp3, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp0, tmp1, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
|
||||
SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp2, tmp3, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
src0 = src4;
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, mask;
|
||||
v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1;
|
||||
v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
filt = LD_UH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
|
||||
hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
|
||||
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
|
||||
SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
|
||||
v16i8 res0, res1, res2, res3;
|
||||
v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
|
||||
v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
|
||||
v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[16]);
|
||||
|
||||
filt = LD_UH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
filt = LD_UH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
|
||||
|
||||
LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
src8 = LD_SB(src);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
|
||||
hz_out3, hz_out5, 8);
|
||||
hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
|
||||
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
|
||||
DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
|
||||
vec4, vec5, vec6, vec7);
|
||||
SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT);
|
||||
PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
|
||||
res0, res1, res2, res3);
|
||||
ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
}
|
||||
else if (8 == height)
|
||||
{
|
||||
common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert)
|
||||
{
|
||||
v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
|
||||
v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
|
||||
v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_SH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h(filt, 0);
|
||||
filt = LD_SH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp0 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp1 = __msa_dotp_u_h(vec1, filt_vt);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp2 = __msa_dotp_u_h(vec2, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp3 = __msa_dotp_u_h(vec3, filt_vt);
|
||||
|
||||
SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8x8mult_msa(uint8_t *RESTRICT src,
|
||||
int32_t src_stride,
|
||||
uint8_t *RESTRICT dst,
|
||||
int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
|
||||
v16u8 filt_hz, filt_vt, vec0;
|
||||
v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
filt = LD_SH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h(filt, 0);
|
||||
filt = LD_SH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
src0 = LD_SB(src);
|
||||
src += src_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
|
||||
for (loop_cnt = (height >> 3); loop_cnt--;)
|
||||
{
|
||||
LD_SB4(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp1 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp2 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp3 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
LD_SB4(src, src_stride, src1, src2, src3, src4);
|
||||
src += (4 * src_stride);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp4 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
SRARI_H2_UH(tmp3, tmp4, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp5 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp6 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
|
||||
tmp7 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
|
||||
tmp8 = __msa_dotp_u_h(vec0, filt_vt);
|
||||
|
||||
SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, VP8_FILTER_SHIFT);
|
||||
PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
|
||||
ST8x4_UB(out0, out1, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
if (4 == height)
|
||||
{
|
||||
common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride,
|
||||
filter_horiz, filter_vert, height);
|
||||
}
|
||||
}
|
||||
|
||||
static void common_hv_2ht_2vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride,
|
||||
const int8_t *filter_horiz,
|
||||
const int8_t *filter_vert,
|
||||
int32_t height)
|
||||
{
|
||||
uint32_t loop_cnt;
|
||||
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
|
||||
v16u8 filt_hz, filt_vt, vec0, vec1;
|
||||
v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3;
|
||||
v8i16 filt;
|
||||
|
||||
mask = LD_SB(&vp8_mc_filt_mask_arr[0]);
|
||||
|
||||
/* rearranging filter */
|
||||
filt = LD_SH(filter_horiz);
|
||||
filt_hz = (v16u8)__msa_splati_h(filt, 0);
|
||||
filt = LD_SH(filter_vert);
|
||||
filt_vt = (v16u8)__msa_splati_h(filt, 0);
|
||||
|
||||
LD_SB2(src, 8, src0, src1);
|
||||
src += src_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT);
|
||||
|
||||
for (loop_cnt = (height >> 2); loop_cnt--;)
|
||||
{
|
||||
LD_SB4(src, src_stride, src0, src2, src4, src6);
|
||||
LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
|
||||
src += (4 * src_stride);
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
|
||||
hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz,
|
||||
VP8_FILTER_SHIFT);
|
||||
ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
|
||||
DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
|
||||
SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT);
|
||||
PCKEV_ST_SB(tmp1, tmp2, dst);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_4w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_4w_msa(src, src_stride, dst, dst_stride, v_filter, 4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_4w_msa(src, src_stride, dst, dst_stride, h_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t tp0, tp1, tp2, tp3;
|
||||
|
||||
LW4(src, src_stride, tp0, tp1, tp2, tp3);
|
||||
SW4(tp0, tp1, tp2, tp3, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x4(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 8);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_msa(uint8_t *RESTRICT src, int32_t src_stride,
|
||||
int32_t xoffset, int32_t yoffset,
|
||||
uint8_t *RESTRICT dst, int32_t dst_stride)
|
||||
{
|
||||
const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1];
|
||||
const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1];
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride,
|
||||
h_filter, v_filter, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
common_vt_2t_16w_msa(src, src_stride, dst, dst_stride, v_filter,
|
||||
16);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (xoffset)
|
||||
{
|
||||
common_hz_2t_16w_msa(src, src_stride, dst, dst_stride, h_filter,
|
||||
16);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static void copy_8x4_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
uint64_t src0, src1, src2, src3;
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void copy_8x8_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
uint64_t src0, src1, src2, src3;
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
src += (4 * src_stride);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
dst += (4 * dst_stride);
|
||||
|
||||
LD4(src, src_stride, src0, src1, src2, src3);
|
||||
SD4(src0, src1, src2, src3, dst, dst_stride);
|
||||
}
|
||||
|
||||
static void copy_16x16_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
|
||||
|
||||
LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
|
||||
src += (8 * src_stride);
|
||||
LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14,
|
||||
src15);
|
||||
|
||||
ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
|
||||
dst += (8 * dst_stride);
|
||||
ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst,
|
||||
dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_16x16_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_8x8_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride,
|
||||
uint8_t *dst, int32_t dst_stride)
|
||||
{
|
||||
copy_8x4_msa(src, src_stride, dst, dst_stride);
|
||||
}
|
||||
@@ -1,457 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static const int32_t cospi8sqrt2minus1 = 20091;
|
||||
static const int32_t sinpi8sqrt2 = 35468;
|
||||
|
||||
#define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 s4_m, s5_m, s6_m, s7_m; \
|
||||
\
|
||||
TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \
|
||||
ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \
|
||||
out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \
|
||||
out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \
|
||||
}
|
||||
|
||||
#define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \
|
||||
({ \
|
||||
v8i16 out_m; \
|
||||
v8i16 zero_m = { 0 }; \
|
||||
v4i32 tmp1_m, tmp2_m; \
|
||||
v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
\
|
||||
ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \
|
||||
tmp1_m >>= 16; \
|
||||
tmp2_m >>= 16; \
|
||||
tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \
|
||||
tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \
|
||||
out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \
|
||||
\
|
||||
out_m; \
|
||||
})
|
||||
|
||||
#define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v8i16 a1_m, b1_m, c1_m, d1_m; \
|
||||
v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v8i16 const_cospi8sqrt2minus1_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \
|
||||
c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \
|
||||
c_tmp2_m = c_tmp2_m >> 1; \
|
||||
c_tmp2_m = in3 + c_tmp2_m; \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \
|
||||
d_tmp1_m = d_tmp1_m >> 1; \
|
||||
d_tmp1_m = in1 + d_tmp1_m; \
|
||||
d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
v4i32 a1_m, b1_m, c1_m, d1_m; \
|
||||
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
|
||||
v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \
|
||||
\
|
||||
const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \
|
||||
sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \
|
||||
a1_m = in0 + in2; \
|
||||
b1_m = in0 - in2; \
|
||||
c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \
|
||||
c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
c1_m = c_tmp1_m - c_tmp2_m; \
|
||||
d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \
|
||||
d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \
|
||||
d1_m = d_tmp1_m + d_tmp2_m; \
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred,
|
||||
int32_t pred_stride,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred,
|
||||
int32_t pred_stride,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 vec;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v16i8 zero = { 0 };
|
||||
v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
|
||||
v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
vec = __msa_fill_h(in_dc);
|
||||
vec = __msa_srari_h(vec, 3);
|
||||
LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3);
|
||||
ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff)
|
||||
{
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
|
||||
v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
UNPCK_SH_SW(input0, in0, in1);
|
||||
UNPCK_SH_SW(input1, in2, in3);
|
||||
BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
|
||||
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
|
||||
BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
|
||||
ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
|
||||
SRA_4V(vt0, vt1, vt2, vt3, 3);
|
||||
mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
|
||||
mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
|
||||
mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
|
||||
mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
|
||||
mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
|
||||
mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
|
||||
mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
|
||||
mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
|
||||
mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
|
||||
mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
|
||||
mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
|
||||
mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
|
||||
mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
|
||||
mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
|
||||
mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
|
||||
mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
|
||||
}
|
||||
|
||||
static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
|
||||
v16i8 dest0, dest1, dest2, dest3;
|
||||
v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
|
||||
v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
|
||||
v2i64 zero = { 0 };
|
||||
v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
LD_SH2(input, 8, input0, input1);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1);
|
||||
PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2);
|
||||
PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h);
|
||||
PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1);
|
||||
UNPCK_SH_SW(mul0, hz0_w, hz1_w);
|
||||
UNPCK_SH_SW(mul1, hz2_w, hz3_w);
|
||||
TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w);
|
||||
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
|
||||
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
|
||||
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
res0 = CLIP_SW_0_255(res0);
|
||||
res1 = CLIP_SW_0_255(res1);
|
||||
res2 = CLIP_SW_0_255(res2);
|
||||
res3 = CLIP_SW_0_255(res3);
|
||||
VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
|
||||
VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
|
||||
ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
|
||||
int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v8i16 in0, in1, in2, in3;
|
||||
v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
|
||||
v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v8i16 res0, res1, res2, res3;
|
||||
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
|
||||
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
|
||||
v16i8 zero = { 0 };
|
||||
|
||||
LD_SH4(input, 8, in0, in1, in2, in3);
|
||||
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
|
||||
MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1,
|
||||
mul0, mul1, mul2, mul3);
|
||||
PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2);
|
||||
PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3);
|
||||
VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
|
||||
TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
|
||||
UNPCK_SH_SW(hz0, hz0r, hz0l);
|
||||
UNPCK_SH_SW(hz1, hz1r, hz1l);
|
||||
UNPCK_SH_SW(hz2, hz2r, hz2l);
|
||||
UNPCK_SH_SW(hz3, hz3r, hz3l);
|
||||
VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l);
|
||||
SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3);
|
||||
VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r);
|
||||
SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3);
|
||||
PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2,
|
||||
vt3);
|
||||
TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
|
||||
res2, res3);
|
||||
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
|
||||
res2, res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
|
||||
__asm__ __volatile__(
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
"sw $zero, 32(%[input]) \n\t"
|
||||
"sw $zero, 36(%[input]) \n\t"
|
||||
"sw $zero, 40(%[input]) \n\t"
|
||||
"sw $zero, 44(%[input]) \n\t"
|
||||
"sw $zero, 48(%[input]) \n\t"
|
||||
"sw $zero, 52(%[input]) \n\t"
|
||||
"sw $zero, 56(%[input]) \n\t"
|
||||
"sw $zero, 60(%[input]) \n\t"::
|
||||
|
||||
[input] "r"(input)
|
||||
);
|
||||
}
|
||||
|
||||
static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input,
|
||||
uint8_t *dest, int32_t dest_stride)
|
||||
{
|
||||
v8i16 input_dc0, input_dc1, vec;
|
||||
v16u8 dest0, dest1, dest2, dest3;
|
||||
v16i8 zero = { 0 };
|
||||
v8i16 res0, res1, res2, res3;
|
||||
|
||||
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
|
||||
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
|
||||
SRARI_H2_SH(input_dc0, input_dc1, 3);
|
||||
vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0);
|
||||
input[0] = 0;
|
||||
input[16] = 0;
|
||||
LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
|
||||
ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0,
|
||||
res1, res2, res3);
|
||||
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
|
||||
CLIP_SH4_0_255(res0, res1, res2, res3);
|
||||
PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1,
|
||||
res2, res3);
|
||||
PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
|
||||
PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
|
||||
ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
|
||||
}
|
||||
|
||||
void vp8_short_idct4x4llm_msa(int16_t *input, uint8_t *pred_ptr,
|
||||
int32_t pred_stride, uint8_t *dst_ptr,
|
||||
int32_t dst_stride)
|
||||
{
|
||||
idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_msa(int16_t input_dc, uint8_t *pred_ptr,
|
||||
int32_t pred_stride, uint8_t *dst_ptr,
|
||||
int32_t dst_stride)
|
||||
{
|
||||
idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride);
|
||||
}
|
||||
|
||||
void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC)
|
||||
{
|
||||
v8i16 dqc0, dqc1, q0, q1, dq0, dq1;
|
||||
|
||||
LD_SH2(DQC, 8, dqc0, dqc1);
|
||||
LD_SH2(d->qcoeff, 8, q0, q1);
|
||||
MUL2(dqc0, q0, dqc1, q1, dq0, dq1);
|
||||
ST_SH2(dq0, dq1, d->dqcoeff, 8);
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq,
|
||||
uint8_t *dest, int32_t stride)
|
||||
{
|
||||
dequant_idct4x4_addblk_msa(input, dq, dest, stride);
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sw $zero, 0(%[input]) \n\t"
|
||||
"sw $zero, 4(%[input]) \n\t"
|
||||
"sw $zero, 8(%[input]) \n\t"
|
||||
"sw $zero, 12(%[input]) \n\t"
|
||||
"sw $zero, 16(%[input]) \n\t"
|
||||
"sw $zero, 20(%[input]) \n\t"
|
||||
"sw $zero, 24(%[input]) \n\t"
|
||||
"sw $zero, 28(%[input]) \n\t"
|
||||
|
||||
:
|
||||
: [input] "r" (input)
|
||||
);
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq,
|
||||
uint8_t *dst, int32_t stride,
|
||||
char *eobs)
|
||||
{
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
uint8_t i;
|
||||
|
||||
for (i = 4; i--;)
|
||||
{
|
||||
if (eobs_h[0])
|
||||
{
|
||||
if (eobs_h[0] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (eobs_h[1])
|
||||
{
|
||||
if (eobs_h[1] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dst += (4 * stride);
|
||||
eobs_h += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq,
|
||||
uint8_t *dstu, uint8_t *dstv,
|
||||
int32_t stride, char *eobs)
|
||||
{
|
||||
int16_t *eobs_h = (int16_t *)eobs;
|
||||
|
||||
if (eobs_h[0])
|
||||
{
|
||||
if (eobs_h[0] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += (stride * 4);
|
||||
|
||||
if (eobs_h[1])
|
||||
{
|
||||
if (eobs_h[1] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstu, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (eobs_h[2])
|
||||
{
|
||||
if (eobs_h[2] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += (stride * 4);
|
||||
|
||||
if (eobs_h[3])
|
||||
{
|
||||
if (eobs_h[3] & 0xfefe)
|
||||
{
|
||||
dequant_idct4x4_addblk_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
dequant_idct_addconst_2x_msa(q, dq, dstv, stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,826 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
#define VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) \
|
||||
{ \
|
||||
v16u8 p1_a_sub_q1, p0_a_sub_q0; \
|
||||
\
|
||||
p0_a_sub_q0 = __msa_asub_u_b(p0, q0); \
|
||||
p1_a_sub_q1 = __msa_asub_u_b(p1, q1); \
|
||||
p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1); \
|
||||
p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0); \
|
||||
mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1); \
|
||||
mask = ((v16u8)mask <= b_limit); \
|
||||
}
|
||||
|
||||
#define VP8_LPF_FILTER4_4W(p1_in_out, p0_in_out, q0_in_out, q1_in_out, \
|
||||
mask_in, hev_in) \
|
||||
{ \
|
||||
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
|
||||
v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
|
||||
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
|
||||
\
|
||||
p1_m = (v16i8)__msa_xori_b(p1_in_out, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0_in_out, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0_in_out, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1_in_out, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
\
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
\
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
|
||||
q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r += q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
|
||||
q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
|
||||
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
|
||||
filt_l += q0_sub_p0_l; \
|
||||
filt_l = __msa_sat_s_h(filt_l, 7); \
|
||||
\
|
||||
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
|
||||
filt = filt & (v16i8)mask_in; \
|
||||
\
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
\
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
\
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
q0_in_out = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
p0_in_out = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
\
|
||||
filt = __msa_srari_b(filt1, 1); \
|
||||
hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
|
||||
filt = filt & (v16i8)hev_in; \
|
||||
\
|
||||
q1_m = __msa_subs_s_b(q1_m, filt); \
|
||||
q1_in_out = __msa_xori_b((v16u8)q1_m, 0x80); \
|
||||
p1_m = __msa_adds_s_b(p1_m, filt); \
|
||||
p1_in_out = __msa_xori_b((v16u8)p1_m, 0x80); \
|
||||
}
|
||||
|
||||
#define VP8_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) \
|
||||
{ \
|
||||
v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, q0_sub_p0_sign; \
|
||||
v16i8 filt, filt1, filt2, cnst4b, cnst3b, filt_sign; \
|
||||
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
|
||||
\
|
||||
p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
\
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_r *= cnst3h; \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r += q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_l *= cnst3h; \
|
||||
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
|
||||
filt_l += q0_sub_p0_l; \
|
||||
filt_l = __msa_sat_s_h(filt_l, 7); \
|
||||
\
|
||||
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
|
||||
filt = filt & (v16i8)(mask); \
|
||||
\
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
\
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
\
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
}
|
||||
|
||||
#define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
|
||||
{ \
|
||||
v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
|
||||
v16i8 filt, q0_sub_p0, cnst4b, cnst3b; \
|
||||
v16i8 u, filt1, filt2, filt_sign, q0_sub_p0_sign; \
|
||||
v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_r, u_r, u_l, filt_l; \
|
||||
v8i16 cnst3h, cnst27h, cnst18h, cnst63h; \
|
||||
\
|
||||
cnst3h = __msa_ldi_h(3); \
|
||||
\
|
||||
p2_m = (v16i8)__msa_xori_b(p2, 0x80); \
|
||||
p1_m = (v16i8)__msa_xori_b(p1, 0x80); \
|
||||
p0_m = (v16i8)__msa_xori_b(p0, 0x80); \
|
||||
q0_m = (v16i8)__msa_xori_b(q0, 0x80); \
|
||||
q1_m = (v16i8)__msa_xori_b(q1, 0x80); \
|
||||
q2_m = (v16i8)__msa_xori_b(q2, 0x80); \
|
||||
\
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
q0_sub_p0 = q0_m - p0_m; \
|
||||
q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
\
|
||||
q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_r *= cnst3h; \
|
||||
filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
|
||||
filt_r = filt_r + q0_sub_p0_r; \
|
||||
filt_r = __msa_sat_s_h(filt_r, 7); \
|
||||
\
|
||||
q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
|
||||
q0_sub_p0_l *= cnst3h; \
|
||||
filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
|
||||
filt_l = filt_l + q0_sub_p0_l; \
|
||||
filt_l = __msa_sat_s_h(filt_l, 7); \
|
||||
\
|
||||
filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
|
||||
filt = filt & (v16i8)mask; \
|
||||
filt2 = filt & (v16i8)hev; \
|
||||
\
|
||||
hev = __msa_xori_b(hev, 0xff); \
|
||||
filt = filt & (v16i8)hev; \
|
||||
cnst4b = __msa_ldi_b(4); \
|
||||
filt1 = __msa_adds_s_b(filt2, cnst4b); \
|
||||
filt1 >>= 3; \
|
||||
cnst3b = __msa_ldi_b(3); \
|
||||
filt2 = __msa_adds_s_b(filt2, cnst3b); \
|
||||
filt2 >>= 3; \
|
||||
q0_m = __msa_subs_s_b(q0_m, filt1); \
|
||||
p0_m = __msa_adds_s_b(p0_m, filt2); \
|
||||
\
|
||||
filt_sign = __msa_clti_s_b(filt, 0); \
|
||||
ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
|
||||
\
|
||||
cnst27h = __msa_ldi_h(27); \
|
||||
cnst63h = __msa_ldi_h(63); \
|
||||
\
|
||||
u_r = filt_r * cnst27h; \
|
||||
u_r += cnst63h; \
|
||||
u_r >>= 7; \
|
||||
u_r = __msa_sat_s_h(u_r, 7); \
|
||||
u_l = filt_l * cnst27h; \
|
||||
u_l += cnst63h; \
|
||||
u_l >>= 7; \
|
||||
u_l = __msa_sat_s_h(u_l, 7); \
|
||||
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
|
||||
q0_m = __msa_subs_s_b(q0_m, u); \
|
||||
q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
|
||||
p0_m = __msa_adds_s_b(p0_m, u); \
|
||||
p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
|
||||
cnst18h = __msa_ldi_h(18); \
|
||||
u_r = filt_r * cnst18h; \
|
||||
u_r += cnst63h; \
|
||||
u_r >>= 7; \
|
||||
u_r = __msa_sat_s_h(u_r, 7); \
|
||||
\
|
||||
u_l = filt_l * cnst18h; \
|
||||
u_l += cnst63h; \
|
||||
u_l >>= 7; \
|
||||
u_l = __msa_sat_s_h(u_l, 7); \
|
||||
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
|
||||
q1_m = __msa_subs_s_b(q1_m, u); \
|
||||
q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
|
||||
p1_m = __msa_adds_s_b(p1_m, u); \
|
||||
p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
|
||||
u_r = filt_r << 3; \
|
||||
u_r += filt_r + cnst63h; \
|
||||
u_r >>= 7; \
|
||||
u_r = __msa_sat_s_h(u_r, 7); \
|
||||
\
|
||||
u_l = filt_l << 3; \
|
||||
u_l += filt_l + cnst63h; \
|
||||
u_l >>= 7; \
|
||||
u_l = __msa_sat_s_h(u_l, 7); \
|
||||
u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
|
||||
q2_m = __msa_subs_s_b(q2_m, u); \
|
||||
q2 = __msa_xori_b((v16u8)q2_m, 0x80); \
|
||||
p2_m = __msa_adds_s_b(p2_m, u); \
|
||||
p2 = __msa_xori_b((v16u8)p2_m, 0x80); \
|
||||
}
|
||||
|
||||
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, \
|
||||
q0_in, q1_in, q2_in, q3_in, \
|
||||
limit_in, b_limit_in, thresh_in, \
|
||||
hev_out, mask_out, flat_out) \
|
||||
{ \
|
||||
v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
|
||||
v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
|
||||
\
|
||||
p3_asub_p2_m = __msa_asub_u_b((p3_in), (p2_in)); \
|
||||
p2_asub_p1_m = __msa_asub_u_b((p2_in), (p1_in)); \
|
||||
p1_asub_p0_m = __msa_asub_u_b((p1_in), (p0_in)); \
|
||||
q1_asub_q0_m = __msa_asub_u_b((q1_in), (q0_in)); \
|
||||
q2_asub_q1_m = __msa_asub_u_b((q2_in), (q1_in)); \
|
||||
q3_asub_q2_m = __msa_asub_u_b((q3_in), (q2_in)); \
|
||||
p0_asub_q0_m = __msa_asub_u_b((p0_in), (q0_in)); \
|
||||
p1_asub_q1_m = __msa_asub_u_b((p1_in), (q1_in)); \
|
||||
flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
|
||||
hev_out = (thresh_in) < (v16u8)flat_out; \
|
||||
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
|
||||
p1_asub_q1_m >>= 1; \
|
||||
p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
|
||||
mask_out = (b_limit_in) < p0_asub_q0_m; \
|
||||
mask_out = __msa_max_u_b(flat_out, mask_out); \
|
||||
p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
|
||||
mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
|
||||
q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
|
||||
mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
|
||||
mask_out = (limit_in) < (v16u8)mask_out; \
|
||||
mask_out = __msa_xori_b(mask_out, 0xff); \
|
||||
}
|
||||
|
||||
#define VP8_ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) \
|
||||
{ \
|
||||
uint16_t tmp0_h; \
|
||||
uint32_t tmp0_w; \
|
||||
\
|
||||
tmp0_w = __msa_copy_u_w((v4i32)in0, in0_idx); \
|
||||
tmp0_h = __msa_copy_u_h((v8i16)in1, in1_idx); \
|
||||
SW(tmp0_w, pdst); \
|
||||
SH(tmp0_h, pdst + stride); \
|
||||
}
|
||||
|
||||
|
||||
static void loop_filter_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit0_ptr,
|
||||
const uint8_t *limit0_ptr,
|
||||
const uint8_t *thresh0_ptr,
|
||||
const uint8_t *b_limit1_ptr,
|
||||
const uint8_t *limit1_ptr,
|
||||
const uint8_t *thresh1_ptr)
|
||||
{
|
||||
v16u8 mask, hev, flat;
|
||||
v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
|
||||
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
|
||||
thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
|
||||
thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
|
||||
|
||||
b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
|
||||
b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
|
||||
b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
|
||||
|
||||
limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
|
||||
limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
|
||||
limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
|
||||
ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
|
||||
}
|
||||
|
||||
static void loop_filter_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit0_ptr,
|
||||
const uint8_t *limit0_ptr,
|
||||
const uint8_t *thresh0_ptr,
|
||||
const uint8_t *b_limit1_ptr,
|
||||
const uint8_t *limit1_ptr,
|
||||
const uint8_t *thresh1_ptr)
|
||||
{
|
||||
v16u8 mask, hev, flat;
|
||||
v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
|
||||
v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
|
||||
LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
LD_UB8(src - 4 + (8 * pitch), pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
|
||||
thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
|
||||
thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
|
||||
|
||||
b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
|
||||
b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
|
||||
b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
|
||||
|
||||
limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
|
||||
limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
|
||||
limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
|
||||
ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
|
||||
|
||||
src -= 2;
|
||||
ST4x8_UB(tmp2, tmp3, src, pitch);
|
||||
src += (8 * pitch);
|
||||
ST4x8_UB(tmp4, tmp5, src, pitch);
|
||||
}
|
||||
|
||||
static void mbloop_filter_horizontal_edge_y_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
temp_src = src - (pitch << 2);
|
||||
LD_UB8(temp_src, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
temp_src = src - 3 * pitch;
|
||||
ST_UB4(p2, p1, p0, q0, temp_src, pitch);
|
||||
temp_src += (4 * pitch);
|
||||
ST_UB2(q1, q2, temp_src, pitch);
|
||||
}
|
||||
|
||||
static void mbloop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
|
||||
v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
|
||||
temp_src = src_u - (pitch << 2);
|
||||
LD_UB8(temp_src, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
|
||||
temp_src = src_v - (pitch << 2);
|
||||
LD_UB8(temp_src, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
|
||||
|
||||
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
|
||||
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
|
||||
p2_d = __msa_copy_u_d((v2i64)p2, 0);
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 0);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 0);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 0);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 0);
|
||||
q2_d = __msa_copy_u_d((v2i64)q2, 0);
|
||||
src_u -= (pitch * 3);
|
||||
SD4(p2_d, p1_d, p0_d, q0_d, src_u, pitch);
|
||||
src_u += 4 * pitch;
|
||||
SD(q1_d, src_u);
|
||||
src_u += pitch;
|
||||
SD(q2_d, src_u);
|
||||
|
||||
p2_d = __msa_copy_u_d((v2i64)p2, 1);
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 1);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 1);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 1);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 1);
|
||||
q2_d = __msa_copy_u_d((v2i64)q2, 1);
|
||||
src_v -= (pitch * 3);
|
||||
SD4(p2_d, p1_d, p0_d, q0_d, src_v, pitch);
|
||||
src_v += 4 * pitch;
|
||||
SD(q1_d, src_v);
|
||||
src_v += pitch;
|
||||
SD(q2_d, src_v);
|
||||
}
|
||||
|
||||
static void mbloop_filter_vertical_edge_y_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
temp_src = src - 4;
|
||||
LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
temp_src += (8 * pitch);
|
||||
LD_UB8(temp_src, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
|
||||
ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
|
||||
ILVRL_B2_SH(q2, q1, tmp2, tmp5);
|
||||
|
||||
temp_src = src - 3;
|
||||
VP8_ST6x1_UB(tmp3, 0, tmp2, 0, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 1, tmp2, 1, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 2, tmp2, 2, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 3, tmp2, 3, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 0, tmp2, 4, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 1, tmp2, 5, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 2, tmp2, 6, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 3, tmp2, 7, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 0, tmp5, 0, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 1, tmp5, 1, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 2, tmp5, 2, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 3, tmp5, 3, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 0, tmp5, 4, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 1, tmp5, 5, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 2, tmp5, 6, temp_src, 4);
|
||||
temp_src += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 3, tmp5, 7, temp_src, 4);
|
||||
}
|
||||
|
||||
static void mbloop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
|
||||
LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
LD_UB8(src_v - 4, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
|
||||
|
||||
ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
|
||||
ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
|
||||
ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
|
||||
ILVRL_B2_SH(q2, q1, tmp2, tmp5);
|
||||
|
||||
src_u -= 3;
|
||||
VP8_ST6x1_UB(tmp3, 0, tmp2, 0, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 1, tmp2, 1, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 2, tmp2, 2, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp3, 3, tmp2, 3, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 0, tmp2, 4, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 1, tmp2, 5, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 2, tmp2, 6, src_u, 4);
|
||||
src_u += pitch;
|
||||
VP8_ST6x1_UB(tmp4, 3, tmp2, 7, src_u, 4);
|
||||
|
||||
src_v -= 3;
|
||||
VP8_ST6x1_UB(tmp6, 0, tmp5, 0, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 1, tmp5, 1, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 2, tmp5, 2, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp6, 3, tmp5, 3, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 0, tmp5, 4, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 1, tmp5, 5, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 2, tmp5, 6, src_v, 4);
|
||||
src_v += pitch;
|
||||
VP8_ST6x1_UB(tmp7, 3, tmp5, 7, src_v, 4);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
v16u8 p1, p0, q1, q0;
|
||||
v16u8 mask, b_limit;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
|
||||
LD_UB4(src - (pitch << 1), pitch, p1, p0, q0, q1);
|
||||
VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
|
||||
VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
|
||||
ST_UB2(p0, q0, (src - pitch), pitch);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_msa(uint8_t *src, int32_t pitch,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
uint8_t *temp_src;
|
||||
v16u8 p1, p0, q1, q0;
|
||||
v16u8 mask, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v8i16 tmp0, tmp1;
|
||||
|
||||
b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
|
||||
temp_src = src - 2;
|
||||
LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
temp_src += (8 * pitch);
|
||||
LD_UB8(temp_src, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p1, p0, q0, q1);
|
||||
VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
|
||||
VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
|
||||
ILVRL_B2_SH(q0, p0, tmp1, tmp0);
|
||||
|
||||
src -= 1;
|
||||
ST2x4_UB(tmp1, 0, src, pitch);
|
||||
src += 4 * pitch;
|
||||
ST2x4_UB(tmp1, 4, src, pitch);
|
||||
src += 4 * pitch;
|
||||
ST2x4_UB(tmp0, 0, src, pitch);
|
||||
src += 4 * pitch;
|
||||
ST2x4_UB(tmp0, 4, src, pitch);
|
||||
src += 4 * pitch;
|
||||
}
|
||||
|
||||
static void loop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
|
||||
v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
|
||||
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
|
||||
src_u = src_u - (pitch << 2);
|
||||
LD_UB8(src_u, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
|
||||
src_u += (5 * pitch);
|
||||
src_v = src_v - (pitch << 2);
|
||||
LD_UB8(src_v, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
|
||||
src_v += (5 * pitch);
|
||||
|
||||
/* right 8 element of p3 are u pixel and
|
||||
left 8 element of p3 are v pixel */
|
||||
ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
|
||||
ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 0);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 0);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 0);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 0);
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_u, (- pitch));
|
||||
|
||||
p1_d = __msa_copy_u_d((v2i64)p1, 1);
|
||||
p0_d = __msa_copy_u_d((v2i64)p0, 1);
|
||||
q0_d = __msa_copy_u_d((v2i64)q0, 1);
|
||||
q1_d = __msa_copy_u_d((v2i64)q1, 1);
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_v, (- pitch));
|
||||
}
|
||||
|
||||
static void loop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
|
||||
int32_t pitch,
|
||||
const uint8_t b_limit_in,
|
||||
const uint8_t limit_in,
|
||||
const uint8_t thresh_in)
|
||||
{
|
||||
uint8_t *temp_src_u, *temp_src_v;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
|
||||
v16u8 mask, hev, flat, thresh, limit, b_limit;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
v16u8 row9, row10, row11, row12, row13, row14, row15;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
|
||||
thresh = (v16u8)__msa_fill_b(thresh_in);
|
||||
limit = (v16u8)__msa_fill_b(limit_in);
|
||||
b_limit = (v16u8)__msa_fill_b(b_limit_in);
|
||||
|
||||
LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
|
||||
LD_UB8(src_v - 4, pitch,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15);
|
||||
TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row8, row9, row10, row11, row12, row13, row14, row15,
|
||||
p3, p2, p1, p0, q0, q1, q2, q3);
|
||||
|
||||
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
|
||||
hev, mask, flat);
|
||||
VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
|
||||
ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
|
||||
ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
|
||||
tmp0 = (v4i32)__msa_ilvl_b((v16i8)p0, (v16i8)p1);
|
||||
tmp1 = (v4i32)__msa_ilvl_b((v16i8)q1, (v16i8)q0);
|
||||
ILVRL_H2_SW(tmp1, tmp0, tmp4, tmp5);
|
||||
|
||||
temp_src_u = src_u - 2;
|
||||
ST4x4_UB(tmp2, tmp2, 0, 1, 2, 3, temp_src_u, pitch);
|
||||
temp_src_u += 4 * pitch;
|
||||
ST4x4_UB(tmp3, tmp3, 0, 1, 2, 3, temp_src_u, pitch);
|
||||
|
||||
temp_src_v = src_v - 2;
|
||||
ST4x4_UB(tmp4, tmp4, 0, 1, 2, 3, temp_src_v, pitch);
|
||||
temp_src_v += 4 * pitch;
|
||||
ST4x4_UB(tmp5, tmp5, 0, 1, 2, 3, temp_src_v, pitch);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbh_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
mbloop_filter_horizontal_edge_y_msa(src_y, pitch_y,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
mbloop_filter_horizontal_edge_uv_msa(src_u, src_v, pitch_u_v,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbv_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
mbloop_filter_vertical_edge_y_msa(src_y, pitch_y,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
mbloop_filter_vertical_edge_uv_msa(src_u, src_v, pitch_u_v,
|
||||
*lpf_info_ptr->mblim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bh_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
loop_filter_horizontal_4_dual_msa(src_y + 4 * pitch_y, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_horizontal_4_dual_msa(src_y + 8 * pitch_y, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_horizontal_4_dual_msa(src_y + 12 * pitch_y, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
loop_filter_horizontal_edge_uv_msa(src_u + (4 * pitch_u_v),
|
||||
src_v + (4 * pitch_u_v),
|
||||
pitch_u_v,
|
||||
*lpf_info_ptr->blim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bv_msa(uint8_t *src_y, uint8_t *src_u,
|
||||
uint8_t *src_v, int32_t pitch_y,
|
||||
int32_t pitch_u_v,
|
||||
loop_filter_info *lpf_info_ptr)
|
||||
{
|
||||
loop_filter_vertical_4_dual_msa(src_y + 4, pitch_y, lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_vertical_4_dual_msa(src_y + 8, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
loop_filter_vertical_4_dual_msa(src_y + 12, pitch_y,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr,
|
||||
lpf_info_ptr->blim,
|
||||
lpf_info_ptr->lim,
|
||||
lpf_info_ptr->hev_thr);
|
||||
if (src_u)
|
||||
{
|
||||
loop_filter_vertical_edge_uv_msa(src_u + 4, src_v + 4, pitch_u_v,
|
||||
*lpf_info_ptr->blim,
|
||||
*lpf_info_ptr->lim,
|
||||
*lpf_info_ptr->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_msa(uint8_t *src_y, int32_t pitch_y,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (4 * pitch_y),
|
||||
pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (8 * pitch_y),
|
||||
pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_horizontal_edge_msa(src_y + (12 * pitch_y),
|
||||
pitch_y, b_limit_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_msa(uint8_t *src_y, int32_t pitch_y,
|
||||
const uint8_t *b_limit_ptr)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_msa(src_y + 4, pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_vertical_edge_msa(src_y + 8, pitch_y, b_limit_ptr);
|
||||
vp8_loop_filter_simple_vertical_edge_msa(src_y + 12, pitch_y, b_limit_ptr);
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/postproc.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static void filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
uint64_t src0_d, src1_d, dst0_d, dst1_d;
|
||||
v16i8 src0 = { 0 };
|
||||
v16i8 src1 = { 0 };
|
||||
v16i8 dst0 = { 0 };
|
||||
v16i8 dst1 = { 0 };
|
||||
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 2; row--;)
|
||||
{
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src0);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst0);
|
||||
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src1);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst1);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst0, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst1, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
v16i8 src0, src1, src2, src3;
|
||||
v16i8 dst0, dst1, dst2, dst3;
|
||||
v8i16 src_wt, dst_wt;
|
||||
v8i16 res_h_r, res_h_l;
|
||||
v8i16 src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 4; row--;)
|
||||
{
|
||||
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
|
||||
src_ptr += (4 * src_stride);
|
||||
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src2, src_r, src_l);
|
||||
UNPCK_UB_SH(dst2, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src3, src_r, src_l);
|
||||
UNPCK_UB_SH(dst3, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||
src_weight);
|
||||
}
|
||||
|
||||
void vp8_filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight)
|
||||
{
|
||||
filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride,
|
||||
src_weight);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user