vpx/test/partial_idct_test.cc
Johann 7994dba6c0 partial_idct_test: add _add_ test
The result of the transform is added to the destination buffers. In the
existing tests the destination buffer is always empty so that portion of
the code was never exercised.

Change-Id: I1858c4fed2274f1b9faf834d2ba4186a4510492a
2016-10-26 21:35:49 -07:00

339 lines
16 KiB
C++

/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
namespace {
typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, InvTxfmFunc, TX_SIZE, int>
PartialInvTxfmParam;
const int kMaxNumCoeffs = 1024;
class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
public:
virtual ~PartialIDctTest() {}
virtual void SetUp() {
ftxfm_ = GET_PARAM(0);
full_itxfm_ = GET_PARAM(1);
partial_itxfm_ = GET_PARAM(2);
tx_size_ = GET_PARAM(3);
last_nonzero_ = GET_PARAM(4);
switch (tx_size_) {
case TX_4X4: size_ = 4; break;
case TX_8X8: size_ = 8; break;
case TX_16X16: size_ = 16; break;
case TX_32X32: size_ = 32; break;
default: FAIL() << "Wrong Size!"; break;
}
block_size_ = size_ * size_;
input_block_ = reinterpret_cast<tran_low_t *>(
vpx_memalign(16, sizeof(*input_block_) * block_size_));
output_block_ = reinterpret_cast<uint8_t *>(
vpx_memalign(16, sizeof(*output_block_) * block_size_));
output_block_ref_ = reinterpret_cast<uint8_t *>(
vpx_memalign(16, sizeof(*output_block_ref_) * block_size_));
}
virtual void TearDown() {
vpx_free(input_block_);
input_block_ = NULL;
vpx_free(output_block_);
output_block_ = NULL;
vpx_free(output_block_ref_);
output_block_ref_ = NULL;
libvpx_test::ClearSystemState();
}
protected:
int last_nonzero_;
TX_SIZE tx_size_;
tran_low_t *input_block_;
uint8_t *output_block_;
uint8_t *output_block_ref_;
int size_;
int block_size_;
FwdTxfmFunc ftxfm_;
InvTxfmFunc full_itxfm_;
InvTxfmFunc partial_itxfm_;
};
TEST_P(PartialIDctTest, RunQuantCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
for (int i = 0; i < count_test_block; ++i) {
// clear out destination buffer
memset(input_block_, 0, sizeof(*input_block_) * block_size_);
memset(output_block_, 0, sizeof(*output_block_) * block_size_);
memset(output_block_ref_, 0, sizeof(*output_block_ref_) * block_size_);
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-255, 255].
if (i == 0) {
for (int j = 0; j < block_size_; ++j) input_extreme_block[j] = 255;
} else if (i == 1) {
for (int j = 0; j < block_size_; ++j) input_extreme_block[j] = -255;
} else {
for (int j = 0; j < block_size_; ++j) {
input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
}
}
ftxfm_(input_extreme_block, output_ref_block, size_);
// quantization with maximum allowed step sizes
input_block_[0] = (output_ref_block[0] / 1336) * 1336;
for (int j = 1; j < last_nonzero_; ++j) {
input_block_[vp9_default_scan_orders[tx_size_].scan[j]] =
(output_ref_block[j] / 1828) * 1828;
}
}
ASM_REGISTER_STATE_CHECK(
full_itxfm_(input_block_, output_block_ref_, size_));
ASM_REGISTER_STATE_CHECK(
partial_itxfm_(input_block_, output_block_, size_));
ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
sizeof(*output_block_) * block_size_))
<< "Error: partial inverse transform produces different results";
}
}
TEST_P(PartialIDctTest, ResultsMatch) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
const int max_coeff = 32766 / 4;
for (int i = 0; i < count_test_block; ++i) {
// clear out destination buffer
memset(input_block_, 0, sizeof(*input_block_) * block_size_);
memset(output_block_, 0, sizeof(*output_block_) * block_size_);
memset(output_block_ref_, 0, sizeof(*output_block_ref_) * block_size_);
int max_energy_leftover = max_coeff * max_coeff;
for (int j = 0; j < last_nonzero_; ++j) {
int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
(rnd.Rand16() - 32768) / 65536);
max_energy_leftover -= coef * coef;
if (max_energy_leftover < 0) {
max_energy_leftover = 0;
coef = 0;
}
input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = coef;
}
ASM_REGISTER_STATE_CHECK(
full_itxfm_(input_block_, output_block_ref_, size_));
ASM_REGISTER_STATE_CHECK(
partial_itxfm_(input_block_, output_block_, size_));
ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
sizeof(*output_block_) * block_size_))
<< "Error: partial inverse transform produces different results";
}
}
TEST_P(PartialIDctTest, AddOutputBlock) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10;
for (int i = 0; i < count_test_block; ++i) {
memset(input_block_, 0, sizeof(*input_block_) * block_size_);
for (int j = 0; j < last_nonzero_; ++j) {
input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = 10;
}
for (int j = 0; j < block_size_; ++j) {
output_block_[j] = output_block_ref_[j] = rnd.Rand8();
}
ASM_REGISTER_STATE_CHECK(
full_itxfm_(input_block_, output_block_ref_, size_));
ASM_REGISTER_STATE_CHECK(
partial_itxfm_(input_block_, output_block_, size_));
ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
sizeof(*output_block_) * block_size_))
<< "Error: Transform results are not correctly added to output.";
}
}
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
C, PartialIDctTest,
::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_c, TX_32X32, 1024),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_135_add_c, TX_32X32, 135),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_34_add_c, TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1_add_c, TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_256_add_c, TX_16X16, 256),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_10_add_c, TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_1_add_c, TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_64_add_c, TX_8X8, 64),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_12_add_c, TX_8X8, 12),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_1_add_c, TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_16_add_c, TX_4X4, 16),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_1_add_c, TX_4X4, 1)));
#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
NEON, PartialIDctTest,
::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1_add_neon, TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_1_add_neon, TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_1_add_neon, TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_1_add_neon, TX_4X4, 1)));
#else // !CONFIG_VP9_HIGHBITDEPTH
// 32x32_135_ is implemented using the 1024 version.
INSTANTIATE_TEST_CASE_P(
NEON, PartialIDctTest,
::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_neon, TX_32X32, 1024),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_neon, TX_32X32, 135),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_34_add_neon, TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1_add_neon, TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_256_add_neon, TX_16X16, 256),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_10_add_neon, TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_1_add_neon, TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_64_add_neon, TX_8X8, 64),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_12_add_neon, TX_8X8, 12),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_1_add_neon, TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_16_add_neon, TX_4X4, 16),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_1_add_neon, TX_4X4, 1)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
// 32x32_135_ is implemented using the 1024 version.
INSTANTIATE_TEST_CASE_P(
SSE2, PartialIDctTest,
::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_sse2, TX_32X32, 1024),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_sse2, TX_32X32, 135),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_34_add_sse2, TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1_add_sse2, TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_256_add_sse2, TX_16X16, 256),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_10_add_sse2, TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_1_add_sse2, TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_64_add_sse2, TX_8X8, 64),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_12_add_sse2, TX_8X8, 12),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_1_add_sse2, TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_16_add_sse2, TX_4X4, 16),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_1_add_sse2, TX_4X4, 1)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3_64, PartialIDctTest,
::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_ssse3, TX_32X32, 1024),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_135_add_ssse3, TX_32X32, 135),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_34_add_ssse3, TX_32X32, 34),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_64_add_ssse3, TX_8X8, 64),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_12_add_ssse3, TX_8X8, 12)));
#endif // HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH &&
// !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
// 32x32_135_ is implemented using the 1024 version.
INSTANTIATE_TEST_CASE_P(
MSA, PartialIDctTest,
::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_msa, TX_32X32, 1024),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1024_add_msa, TX_32X32, 135),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_34_add_msa, TX_32X32, 34),
make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
&vpx_idct32x32_1_add_msa, TX_32X32, 1),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_256_add_msa, TX_16X16, 256),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_10_add_msa, TX_16X16, 10),
make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
&vpx_idct16x16_1_add_msa, TX_16X16, 1),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_64_add_msa, TX_8X8, 64),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_12_add_msa, TX_8X8, 10),
make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
&vpx_idct8x8_1_add_msa, TX_8X8, 1),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_16_add_msa, TX_4X4, 16),
make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
&vpx_idct4x4_1_add_msa, TX_4X4, 1)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace