From 0ce51d823fcef0f88d213958b43d713ce7bdf189 Mon Sep 17 00:00:00 2001 From: Jim Bankoski Date: Tue, 7 Oct 2014 16:36:14 -0700 Subject: [PATCH] experimental : partition using 1/8 x 1/8 image The concept: There's too much noise in source pixels for variance and at low bitrate the reconstructed looks nothing like the source so we have problems getting good partitionings with either. This skirts the issue by using a box blur scaled down version for variance calculations. To compare against source_var_ moved keyframe to be rd based like source_var. Change-Id: Ie3babdbfadae324b7b5a76bea192893af27f0624 --- test/test.mk | 1 + test/vp9_avg_test.cc | 150 ++++++++++++++++++++++++++ vp9/common/vp9_rtcd_defs.pl | 4 + vp9/encoder/vp9_avg.c | 19 ++++ vp9/encoder/vp9_encodeframe.c | 34 +++--- vp9/encoder/vp9_pickmode.c | 7 +- vp9/encoder/vp9_speed_features.c | 12 ++- vp9/encoder/vp9_speed_features.h | 3 + vp9/encoder/x86/vp9_avg_intrin_sse2.c | 40 +++++++ vp9/vp9cx.mk | 2 + 10 files changed, 255 insertions(+), 17 deletions(-) create mode 100644 test/vp9_avg_test.cc create mode 100644 vp9/encoder/vp9_avg.c create mode 100644 vp9/encoder/x86/vp9_avg_intrin_sse2.c diff --git a/test/test.mk b/test/test.mk index bdde5064a..5228614ae 100644 --- a/test/test.mk +++ b/test/test.mk @@ -129,6 +129,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) diff --git a/test/vp9_avg_test.cc b/test/vp9_avg_test.cc new file mode 100644 index 000000000..c2e472b5d --- /dev/null +++ b/test/vp9_avg_test.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include +#include + +#include "./vpx_config.h" +#if CONFIG_VP9_ENCODER +#include "./vp9_rtcd.h" +#endif +#include "vpx_mem/vpx_mem.h" + +#include "test/acm_random.h" +#include "test/clear_system_state.h" +#include "test/register_state_check.h" +#include "test/util.h" +#include "third_party/googletest/src/include/gtest/gtest.h" + +using libvpx_test::ACMRandom; + +namespace { +class AverageTestBase : public ::testing::Test { + public: + AverageTestBase(int width, int height) : width_(width), height_(height) {} + + static void SetUpTestCase() { + source_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBlockSize)); + } + + static void TearDownTestCase() { + vpx_free(source_data_); + source_data_ = NULL; + } + + virtual void TearDown() { + libvpx_test::ClearSystemState(); + } + + protected: + // Handle blocks up to 4 blocks 64x64 with stride up to 128 + static const int kDataAlignment = 16; + static const int kDataBlockSize = 64 * 128; + + virtual void SetUp() { + source_stride_ = (width_ + 31) & ~31; + rnd_.Reset(ACMRandom::DeterministicSeed()); + } + + // Sum Pixels + unsigned int ReferenceAverage(const uint8_t* source, int pitch ) { + unsigned int average = 0; + for (int h = 0; h < 8; ++h) + for (int w = 0; w < 8; ++w) + average += source[h * source_stride_ + w]; + return ((average + 32) >> 6); + } + + void FillConstant(uint8_t fill_constant) { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = fill_constant; + } + } + + void FillRandom() { + for (int i = 0; i < width_ * height_; ++i) { + source_data_[i] = rnd_.Rand8(); + } + } + + int width_, height_; + static uint8_t* source_data_; + int source_stride_; + + ACMRandom rnd_; +}; +typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch); + +typedef std::tr1::tuple AvgFunc; + +class AverageTest + : public AverageTestBase, + public ::testing::WithParamInterface{ + public: + AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + void CheckAverages() { + unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2), + source_stride_); + + ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2), + source_stride_)); + unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2), + source_stride_); + + EXPECT_EQ(expected, actual); + } +}; + + +uint8_t* AverageTestBase::source_data_ = NULL; + +TEST_P(AverageTest, MinValue) { + FillConstant(0); + CheckAverages(); +} + +TEST_P(AverageTest, MaxValue) { + FillConstant(255); + CheckAverages(); +} + +TEST_P(AverageTest, Random) { + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + for (int i = 0; i < 1000; i++) { + FillRandom(); + CheckAverages(); + } +} + +using std::tr1::make_tuple; + +INSTANTIATE_TEST_CASE_P( + C, AverageTest, + ::testing::Values( + make_tuple(16, 16, 1, &vp9_avg_8x8_c))); + + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, AverageTest, + ::testing::Values( + make_tuple(16, 16, 0, &vp9_avg_8x8_sse2), + make_tuple(16, 16, 5, &vp9_avg_8x8_sse2), + make_tuple(32, 32, 15, &vp9_avg_8x8_sse2))); + +#endif + +} // namespace diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index 27ccf03e7..e3f2cf8e6 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1110,6 +1110,10 @@ specialize qw/vp9_mse8x8/, "$sse2_x86inc"; add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; specialize qw/vp9_get_mb_ss/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p"; +specialize qw/vp9_avg_8x8/, "$sse2_x86inc"; + # ENCODEMB INVOKE add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c new file mode 100644 index 000000000..22c6cc4fc --- /dev/null +++ b/vp9/encoder/vp9_avg.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "vpx_ports/mem.h" + +unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { + int i, j; + int sum = 0; + for (i = 0; i < 8; ++i, s+=p) + for (j = 0; j < 8; sum += s[j], ++j) {} + + return (sum + 32) >> 6; +} diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 81d5d592f..227676e1b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -396,10 +396,10 @@ static int set_vt_partitioning(VP9_COMP *cpi, const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; // TODO(debargha): Choose this more intelligently. - const int64_t threshold_multiplier = 25; - int64_t threshold = threshold_multiplier * cpi->common.base_qindex; + const int64_t threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4; + int64_t threshold = threshold_multiplier * + vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); assert(block_height == block_width); - tree_to_node(data, bsize, &vt); // Split none is available only if we have more than half a block size @@ -511,10 +511,17 @@ static void choose_partitioning(VP9_COMP *cpi, int y_idx = y16_idx + ((k >> 1) << 3); unsigned int sse = 0; int sum = 0; - if (x_idx < pixels_wide && y_idx < pixels_high) - vp9_get8x8var(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); - fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); + + if (x_idx < pixels_wide && y_idx < pixels_high) { + int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); + int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); + sum = s_avg - d_avg; + sse = sum * sum; + } + // For an 8x8 block we have just one value the average of all 64 + // pixels, so use 1. This means of course that there is no variance + // in an 8x8 block. + fill_variance(sse, sum, 1, &vst->split[k].part_variances.none); } } } @@ -530,8 +537,8 @@ static void choose_partitioning(VP9_COMP *cpi, // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold, or we // hit 8x8. - if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64, - mi_row, mi_col)) { + if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || + !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); @@ -561,10 +568,10 @@ static void choose_partitioning(VP9_COMP *cpi, } } #else - if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile, + if (!set_vt_partitioning(cpi, &vt.split[i].split[j], BLOCK_16X16, - (mi_row + y32_idx + y16_idx), - (mi_col + x32_idx + x16_idx), 2)) { + mi_row + y32_idx + y16_idx, + mi_col + x32_idx + x16_idx)) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); @@ -2593,7 +2600,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); - } else if (sf->partition_search_type == VAR_BASED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_PARTITION && + cm->frame_type != KEY_FRAME ) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 428767a44..4efa22a89 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -235,6 +235,10 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); else xd->mi[0].src_mi->mbmi.tx_size = TX_8X8; + + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION && + xd->mi[0].src_mi->mbmi.tx_size > TX_16X16) + xd->mi[0].src_mi->mbmi.tx_size = TX_16X16; } else { xd->mi[0].src_mi->mbmi.tx_size = MIN(max_txsize_lookup[bsize], @@ -611,7 +615,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; if (this_mode == NEWMV) { - if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) + if (cpi->sf.partition_search_type != VAR_BASED_PARTITION && + this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) continue; if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame], diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 062da09a0..e45a07bc5 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -249,6 +249,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->mv.search_method = FAST_HEX; + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; @@ -278,12 +279,17 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int i; // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used for (i = 0; i < BLOCK_SIZES; ++i) - sf->inter_mode_mask[i] = INTER_ALL; + sf->inter_mode_mask[i] = INTER_NEAREST_NEAR_NEW; } // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. - sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; + sf->partition_search_type = VAR_BASED_PARTITION; sf->search_type_check_frequency = 50; + sf->mv.search_method = NSTEP; + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; + sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; @@ -291,7 +297,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->reuse_inter_pred_sby = 1; // Increase mode checking threshold for NEWMV. - sf->elevate_newmv_thresh = 2000; + sf->elevate_newmv_thresh = 1000; sf->mv.reduce_first_step_size = 1; } diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index e71a47b35..cee8ec285 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -34,6 +34,9 @@ enum { enum { INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), INTER_NEAREST = (1 << NEARESTMV), + INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV), + INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV), + INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV), INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV), INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV), }; diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c new file mode 100644 index 000000000..c6f94dc54 --- /dev/null +++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include "vpx_ports/mem.h" + + +unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) { + __m128i s0, s1, u0; + unsigned int avg = 0; + u0 = _mm_setzero_si128(); + s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0); + s0 = _mm_adds_epu16(s0, s1); + + s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8)); + s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32)); + s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16)); + avg = _mm_extract_epi16(s0, 0); + return (avg + 32) >> 6; +} diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 869737137..a2e3cda7f 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -17,6 +17,7 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c +VP9_CX_SRCS-yes += encoder/vp9_avg.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.h @@ -95,6 +96,7 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm