diff --git a/test/test.mk b/test/test.mk index bdde5064a..5228614ae 100644 --- a/test/test.mk +++ b/test/test.mk @@ -129,6 +129,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) diff --git a/test/vp9_avg_test.cc b/test/vp9_avg_test.cc new file mode 100644 index 000000000..c2e472b5d --- /dev/null +++ b/test/vp9_avg_test.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include +#include + +#include "./vpx_config.h" +#if CONFIG_VP9_ENCODER +#include "./vp9_rtcd.h" +#endif +#include "vpx_mem/vpx_mem.h" + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +using libvpx_test::ACMRandom; + +namespace { +class AverageTestBase : public ::testing::Test { + public: + AverageTestBase(int width, int height) : width_(width), height_(height) {} + + static void SetUpTestCase() { + source_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBlockSize)); + } + + static void TearDownTestCase() { + vpx_free(source_data_); + source_data_ = NULL; + } + + virtual void TearDown() { + libvpx_test::ClearSystemState(); + } + + protected: + // Handle blocks up to 4 blocks 64x64 with stride up to 128 + static const int kDataAlignment = 16; + static const int kDataBlockSize = 64 * 128; + + virtual void SetUp() { + source_stride_ = (width_ + 31) & ~31; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + // Sum Pixels + unsigned int ReferenceAverage(const uint8_t* source, int pitch ) { + unsigned int average = 0; + for (int h = 0; h < 8; ++h) + for (int w = 0; w < 8; ++w) + average += source[h * source_stride_ + w]; + return ((average + 32) >> 6); + } + + void FillConstant(uint8_t fill_constant) { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = fill_constant; + } + } + + void FillRandom() { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = rnd_.Rand8(); + } + } + + int width_, height_; + static uint8_t* source_data_; + int source_stride_; + + ACMRandom rnd_; +}; +typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch); + +typedef std::tr1::tuple AvgFunc; + +class AverageTest + : public AverageTestBase, + public ::testing::WithParamInterface{ + public: + AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + void CheckAverages() { + unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2), + source_stride_); + + ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2), + source_stride_)); + unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2), + source_stride_); + + EXPECT_EQ(expected, actual); + } +}; + + +uint8_t* AverageTestBase::source_data_ = NULL; + +TEST_P(AverageTest, MinValue) { + FillConstant(0); + CheckAverages(); +} + +TEST_P(AverageTest, MaxValue) { + FillConstant(255); + CheckAverages(); +} + +TEST_P(AverageTest, Random) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + for (int i = 0; i < 1000; i++) { + FillRandom(); + CheckAverages(); + } +} + +using std::tr1::make_tuple; + +INSTANTIATE_TEST_CASE_P( + C, AverageTest, + ::testing::Values( + make_tuple(16, 16, 1, &vp9_avg_8x8_c))); + + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, AverageTest, + ::testing::Values( + make_tuple(16, 16, 0, &vp9_avg_8x8_sse2), + make_tuple(16, 16, 5, &vp9_avg_8x8_sse2), + make_tuple(32, 32, 15, &vp9_avg_8x8_sse2))); + +#endif + +} // namespace diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 27ccf03e7..e3f2cf8e6 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1110,6 +1110,10 @@ specialize qw/vp9_mse8x8/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; specialize qw/vp9_get_mb_ss/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p"; +specialize qw/vp9_avg_8x8/, "$sse2_x86inc"; + # ENCODEMB INVOKE add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c new file mode 100644 index 000000000..22c6cc4fc --- /dev/null +++ b/vp9/encoder/vp9_avg.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "vpx_ports/mem.h" + +unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { + int i, j; + int sum = 0; + for (i = 0; i < 8; ++i, s+=p) + for (j = 0; j < 8; sum += s[j], ++j) {} + + return (sum + 32) >> 6; +} diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 81d5d592f..227676e1b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -396,10 +396,10 @@ static int set_vt_partitioning(VP9_COMP *cpi, const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; // TODO(debargha): Choose this more intelligently. - const int64_t threshold_multiplier = 25; - int64_t threshold = threshold_multiplier * cpi->common.base_qindex; + const int64_t threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4; + int64_t threshold = threshold_multiplier * + vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); assert(block_height == block_width); - tree_to_node(data, bsize, &vt); // Split none is available only if we have more than half a block size @@ -511,10 +511,17 @@ static void choose_partitioning(VP9_COMP *cpi, int y_idx = y16_idx + ((k >> 1) << 3); unsigned int sse = 0; int sum = 0; - if (x_idx < pixels_wide && y_idx < pixels_high) - vp9_get8x8var(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); - fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); + + if (x_idx < pixels_wide && y_idx < pixels_high) { + int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); + int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); + sum = s_avg - d_avg; + sse = sum * sum; + } + // For an 8x8 block we have just one value the average of all 64 + // pixels, so use 1. This means of course that there is no variance + // in an 8x8 block. + fill_variance(sse, sum, 1, &vst->split[k].part_variances.none); } } } @@ -530,8 +537,8 @@ static void choose_partitioning(VP9_COMP *cpi, // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold, or we // hit 8x8. - if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64, - mi_row, mi_col)) { + if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || + !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); @@ -561,10 +568,10 @@ static void choose_partitioning(VP9_COMP *cpi, } } #else - if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile, + if (!set_vt_partitioning(cpi, &vt.split[i].split[j], BLOCK_16X16, - (mi_row + y32_idx + y16_idx), - (mi_col + x32_idx + x16_idx), 2)) { + mi_row + y32_idx + y16_idx, + mi_col + x32_idx + x16_idx)) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); @@ -2593,7 +2600,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); - } else if (sf->partition_search_type == VAR_BASED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_PARTITION && + cm->frame_type != KEY_FRAME ) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 428767a44..4efa22a89 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -235,6 +235,10 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); else xd->mi[0].src_mi->mbmi.tx_size = TX_8X8; + + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION && + xd->mi[0].src_mi->mbmi.tx_size > TX_16X16) + xd->mi[0].src_mi->mbmi.tx_size = TX_16X16; } else { xd->mi[0].src_mi->mbmi.tx_size = MIN(max_txsize_lookup[bsize], @@ -611,7 +615,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; if (this_mode == NEWMV) { - if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) + if (cpi->sf.partition_search_type != VAR_BASED_PARTITION && + this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) continue; if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame], diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 062da09a0..e45a07bc5 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -249,6 +249,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->mv.search_method = FAST_HEX; + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; @@ -278,12 +279,17 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int i; // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used for (i = 0; i < BLOCK_SIZES; ++i) - sf->inter_mode_mask[i] = INTER_ALL; + sf->inter_mode_mask[i] = INTER_NEAREST_NEAR_NEW; } // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. - sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; + sf->partition_search_type = VAR_BASED_PARTITION; sf->search_type_check_frequency = 50; + sf->mv.search_method = NSTEP; + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; @@ -291,7 +297,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->reuse_inter_pred_sby = 1; // Increase mode checking threshold for NEWMV. - sf->elevate_newmv_thresh = 2000; + sf->elevate_newmv_thresh = 1000; sf->mv.reduce_first_step_size = 1; } diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index e71a47b35..cee8ec285 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -34,6 +34,9 @@ enum { enum { INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), INTER_NEAREST = (1 << NEARESTMV), + INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV), + INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV), + INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV), INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV), INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV), }; diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c new file mode 100644 index 000000000..c6f94dc54 --- /dev/null +++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "vpx_ports/mem.h" + + +unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) { + __m128i s0, s1, u0; + unsigned int avg = 0; + u0 = _mm_setzero_si128(); + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + + s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8)); + s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32)); + s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16)); + avg = _mm_extract_epi16(s0, 0); + return (avg + 32) >> 6; +} diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 869737137..a2e3cda7f 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -17,6 +17,7 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c +VP9_CX_SRCS-yes += encoder/vp9_avg.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.h @@ -95,6 +96,7 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm