experimental : partition using 1/8 x 1/8 image

The concept: There's too much noise in source pixels for variance and at low bitrate the reconstructed looks nothing like the source so we have problems getting good partitionings with either. This skirts the issue by using a box blur scaled down version for variance calculations. To compare against source_var_ moved keyframe to be rd based like source_var. Change-Id: Ie3babdbfadae324b7b5a76bea192893af27f0624
2014-10-07 16:36:14 -07:00 · 2014-10-07 16:36:14 -07:00 · 0ce51d823f
commit 0ce51d823f
parent dae97868da
10 changed files with 255 additions and 17 deletions
--- a/test/test.mk
+++ b/test/test.mk
@ -129,6 +129,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += vp9_intrapred_test.cc

 ifeq ($(CONFIG_VP9_ENCODER),yes)
--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@ -0,0 +1,150 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
+#include "vpx_mem/vpx_mem.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class AverageTestBase : public ::testing::Test {
+ public:
+  AverageTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBlockSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(source_data_);
+    source_data_ = NULL;
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Handle blocks up to 4 blocks 64x64 with stride up to 128
+  static const int kDataAlignment = 16;
+  static const int kDataBlockSize = 64 * 128;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  // Sum Pixels
+  unsigned int ReferenceAverage(const uint8_t* source, int pitch ) {
+    unsigned int average = 0;
+    for (int h = 0; h < 8; ++h)
+      for (int w = 0; w < 8; ++w)
+        average += source[h * source_stride_ + w];
+    return ((average + 32) >> 6);
+  }
+
+  void FillConstant(uint8_t fill_constant) {
+    for (int i = 0; i < width_ * height_; ++i) {
+        source_data_[i] = fill_constant;
+    }
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < width_ * height_; ++i) {
+        source_data_[i] = rnd_.Rand8();
+    }
+  }
+
+  int width_, height_;
+  static uint8_t* source_data_;
+  int source_stride_;
+
+  ACMRandom rnd_;
+};
+typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);
+
+typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc;
+
+class AverageTest
+    : public AverageTestBase,
+      public ::testing::WithParamInterface<AvgFunc>{
+ public:
+  AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  void CheckAverages() {
+    unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2),
+                                             source_stride_);
+
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2),
+                                          source_stride_));
+    unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2),
+                                       source_stride_);
+
+    EXPECT_EQ(expected, actual);
+  }
+};
+
+
+uint8_t* AverageTestBase::source_data_ = NULL;
+
+TEST_P(AverageTest, MinValue) {
+  FillConstant(0);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, MaxValue) {
+  FillConstant(255);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, Random) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  for (int i = 0; i < 1000; i++) {
+    FillRandom();
+    CheckAverages();
+  }
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, AverageTest,
+    ::testing::Values(
+        make_tuple(16, 16, 1, &vp9_avg_8x8_c)));
+
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AverageTest,
+    ::testing::Values(
+        make_tuple(16, 16, 0, &vp9_avg_8x8_sse2),
+        make_tuple(16, 16, 5, &vp9_avg_8x8_sse2),
+        make_tuple(32, 32, 15, &vp9_avg_8x8_sse2)));
+
+#endif
+
+}  // namespace
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@ -1110,6 +1110,10 @@ specialize qw/vp9_mse8x8/, "$sse2_x86inc";

 add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
 specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
+specialize qw/vp9_avg_8x8/, "$sse2_x86inc";
+
 # ENCODEMB INVOKE

 add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_ports/mem.h"
+
+unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
+  int i, j;
+  int sum = 0;
+  for (i = 0; i < 8; ++i, s+=p)
+    for (j = 0; j < 8; sum += s[j], ++j) {}
+
+  return (sum + 32) >> 6;
+}
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@ -396,10 +396,10 @@ static int set_vt_partitioning(VP9_COMP *cpi,
  const int block_width = num_8x8_blocks_wide_lookup[bsize];
  const int block_height = num_8x8_blocks_high_lookup[bsize];
  // TODO(debargha): Choose this more intelligently.
-  const int64_t threshold_multiplier = 25;
-  int64_t threshold = threshold_multiplier * cpi->common.base_qindex;
+  const int64_t threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4;
+  int64_t threshold = threshold_multiplier *
+      vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
  assert(block_height == block_width);
-
  tree_to_node(data, bsize, &vt);

  // Split none is available only if we have more than half a block size
@ -511,10 +511,17 @@ static void choose_partitioning(VP9_COMP *cpi,
        int y_idx = y16_idx + ((k >> 1) << 3);
        unsigned int sse = 0;
        int sum = 0;
-        if (x_idx < pixels_wide && y_idx < pixels_high)
-          vp9_get8x8var(s + y_idx * sp + x_idx, sp,
-                        d + y_idx * dp + x_idx, dp, &sse, &sum);
-        fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
+
+        if (x_idx < pixels_wide && y_idx < pixels_high) {
+          int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+          int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+          sum = s_avg - d_avg;
+          sse = sum * sum;
+        }
+        // For an 8x8 block we have just one value the average of all 64
+        // pixels,  so use 1.   This means of course that there is no variance
+        // in an 8x8 block.
+        fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);
      }
    }
  }
@ -530,8 +537,8 @@ static void choose_partitioning(VP9_COMP *cpi,
  // Now go through the entire structure,  splitting every block size until
  // we get to one that's got a variance lower than our threshold,  or we
  // hit 8x8.
-  if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,
-                           mi_row, mi_col)) {
+  if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
+      !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) {
    for (i = 0; i < 4; ++i) {
      const int x32_idx = ((i & 1) << 2);
      const int y32_idx = ((i >> 1) << 2);
@ -561,10 +568,10 @@ static void choose_partitioning(VP9_COMP *cpi,
            }
          }
 #else
-          if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,
+          if (!set_vt_partitioning(cpi, &vt.split[i].split[j],
                                   BLOCK_16X16,
-                                   (mi_row + y32_idx + y16_idx),
-                                   (mi_col + x32_idx + x16_idx), 2)) {
+                                   mi_row + y32_idx + y16_idx,
+                                   mi_col + x32_idx + x16_idx)) {
            for (k = 0; k < 4; ++k) {
              const int x8_idx = (k & 1);
              const int y8_idx = (k >> 1);
@ -2593,7 +2600,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
      set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
      rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                       &dummy_rate, &dummy_dist, 1, cpi->pc_root);
-    } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
+      } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
+                 cm->frame_type != KEY_FRAME ) {
      choose_partitioning(cpi, tile, mi_row, mi_col);
      rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                       &dummy_rate, &dummy_dist, 1, cpi->pc_root);
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@ -235,6 +235,10 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
              tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
    else
      xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
+
+    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
+        xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)
+      xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;
  } else {
    xd->mi[0].src_mi->mbmi.tx_size =
        MIN(max_txsize_lookup[bsize],
@ -611,7 +615,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        continue;

      if (this_mode == NEWMV) {
-        if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
+        if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
+            this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
          continue;
        if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
                                    &frame_mv[NEWMV][ref_frame],
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@ -249,6 +249,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
    sf->frame_parameter_update = 0;
    sf->mv.search_method = FAST_HEX;
+
    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
@ -278,12 +279,17 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
      int i;
      // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used
      for (i = 0; i < BLOCK_SIZES; ++i)
-        sf->inter_mode_mask[i] = INTER_ALL;
+        sf->inter_mode_mask[i] = INTER_NEAREST_NEAR_NEW;
    }

    // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
-    sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
+    sf->partition_search_type = VAR_BASED_PARTITION;
    sf->search_type_check_frequency = 50;
+    sf->mv.search_method = NSTEP;
+    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;

    sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;

@ -291,7 +297,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
    sf->reuse_inter_pred_sby = 1;

    // Increase mode checking threshold for NEWMV.
-    sf->elevate_newmv_thresh = 2000;
+    sf->elevate_newmv_thresh = 1000;

    sf->mv.reduce_first_step_size = 1;
  }
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@ -34,6 +34,9 @@ enum {
 enum {
  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
  INTER_NEAREST = (1 << NEARESTMV),
+  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
+  INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV),
+  INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV),
  INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
  INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV),
 };
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <immintrin.h>
+#include "vpx_ports/mem.h"
+
+
+unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
+  __m128i s0, s1, u0;
+  unsigned int avg = 0;
+  u0  = _mm_setzero_si128();
+  s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+
+  s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));
+  s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32));
+  s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
+  avg = _mm_extract_epi16(s0, 0);
+  return (avg + 32) >> 6;
+}
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@ -17,6 +17,7 @@ VP9_CX_SRCS_REMOVE-no  += $(VP9_COMMON_SRCS_REMOVE-no)

 VP9_CX_SRCS-yes += vp9_cx_iface.c

+VP9_CX_SRCS-yes += encoder/vp9_avg.c
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
 VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
 VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
@ -95,6 +96,7 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h

 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm