vpx/test/vp9_block_error_test.cc
Johann 904b957ae9 consolidate block_error functions
vp9_highbd_block_error_8bit_c was a very simple wrapper around
vp9_block_error_c. The SSE2 implemention was practically identical to
the non-HBD one. It was missing some minor improvements which only
went into the original version.

In quick speed tests, the AVX implementation showed minimal
improvement over SSE2 when it does not detect overflow. However, when
overflow is detected the function is run a second time. The
OperationCheck test seems to trigger this case and reverses any
speed benefits by running ~60% slower. AVX2 on the other hand is
always 30-40% faster.

Change-Id: I9fcb9afbcb560f234c7ae1b13ddb69eca3988ba1
2017-02-24 05:25:26 +00:00

199 lines
6.4 KiB
C++

/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <cmath>
#include <cstdlib>
#include <string>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
using libvpx_test::ACMRandom;
namespace {
const int kNumIterations = 1000;
typedef int64_t (*HBDBlockErrorFunc)(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz,
int bps);
typedef std::tr1::tuple<HBDBlockErrorFunc, HBDBlockErrorFunc, vpx_bit_depth_t>
BlockErrorParam;
typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz);
template <BlockErrorFunc fn>
int64_t BlockError8BitWrapper(const tran_low_t *coeff,
const tran_low_t *dqcoeff, intptr_t block_size,
int64_t *ssz, int bps) {
EXPECT_EQ(bps, 8);
return fn(coeff, dqcoeff, block_size, ssz);
}
class BlockErrorTest : public ::testing::TestWithParam<BlockErrorParam> {
public:
virtual ~BlockErrorTest() {}
virtual void SetUp() {
error_block_op_ = GET_PARAM(0);
ref_error_block_op_ = GET_PARAM(1);
bit_depth_ = GET_PARAM(2);
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
vpx_bit_depth_t bit_depth_;
HBDBlockErrorFunc error_block_op_;
HBDBlockErrorFunc ref_error_block_op_;
};
TEST_P(BlockErrorTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
int err_count_total = 0;
int first_failure = -1;
intptr_t block_size;
int64_t ssz;
int64_t ret;
int64_t ref_ssz;
int64_t ref_ret;
const int msb = bit_depth_ + 8 - 1;
for (int i = 0; i < kNumIterations; ++i) {
int err_count = 0;
block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
for (int j = 0; j < block_size; j++) {
// coeff and dqcoeff will always have at least the same sign, and this
// can be used for optimization, so generate test input precisely.
if (rnd(2)) {
// Positive number
coeff[j] = rnd(1 << msb);
dqcoeff[j] = rnd(1 << msb);
} else {
// Negative number
coeff[j] = -rnd(1 << msb);
dqcoeff[j] = -rnd(1 << msb);
}
}
ref_ret =
ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
ASM_REGISTER_STATE_CHECK(
ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
err_count += (ref_ret != ret) | (ref_ssz != ssz);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Error Block Test, C output doesn't match optimized output. "
<< "First failed at test case " << first_failure;
}
TEST_P(BlockErrorTest, ExtremeValues) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
int err_count_total = 0;
int first_failure = -1;
intptr_t block_size;
int64_t ssz;
int64_t ret;
int64_t ref_ssz;
int64_t ref_ret;
const int msb = bit_depth_ + 8 - 1;
int max_val = ((1 << msb) - 1);
for (int i = 0; i < kNumIterations; ++i) {
int err_count = 0;
int k = (i / 9) % 9;
// Change the maximum coeff value, to test different bit boundaries
if (k == 8 && (i % 9) == 0) {
max_val >>= 1;
}
block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
for (int j = 0; j < block_size; j++) {
if (k < 4) {
// Test at positive maximum values
coeff[j] = k % 2 ? max_val : 0;
dqcoeff[j] = (k >> 1) % 2 ? max_val : 0;
} else if (k < 8) {
// Test at negative maximum values
coeff[j] = k % 2 ? -max_val : 0;
dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0;
} else {
if (rnd(2)) {
// Positive number
coeff[j] = rnd(1 << 14);
dqcoeff[j] = rnd(1 << 14);
} else {
// Negative number
coeff[j] = -rnd(1 << 14);
dqcoeff[j] = -rnd(1 << 14);
}
}
}
ref_ret =
ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_);
ASM_REGISTER_STATE_CHECK(
ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_));
err_count += (ref_ret != ret) | (ref_ssz != ssz);
if (err_count && !err_count_total) {
first_failure = i;
}
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
<< "Error: Error Block Test, C output doesn't match optimized output. "
<< "First failed at test case " << first_failure;
}
using std::tr1::make_tuple;
#if HAVE_SSE2
const BlockErrorParam sse2_block_error_tests[] = {
#if CONFIG_VP9_HIGHBITDEPTH
make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
VPX_BITS_10),
make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
VPX_BITS_12),
make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
VPX_BITS_8),
#endif // CONFIG_VP9_HIGHBITDEPTH
make_tuple(&BlockError8BitWrapper<vp9_block_error_sse2>,
&BlockError8BitWrapper<vp9_block_error_c>, VPX_BITS_8)
};
INSTANTIATE_TEST_CASE_P(SSE2, BlockErrorTest,
::testing::ValuesIn(sse2_block_error_tests));
#endif // HAVE_SSE2
#if HAVE_AVX2
INSTANTIATE_TEST_CASE_P(
AVX2, BlockErrorTest,
::testing::Values(make_tuple(&BlockError8BitWrapper<vp9_block_error_avx2>,
&BlockError8BitWrapper<vp9_block_error_c>,
VPX_BITS_8)));
#endif // HAVE_AVX2
} // namespace