From 7a41610581672ae367e2b598ef8ab0f9db36fe45 Mon Sep 17 00:00:00 2001 From: Linfeng Zhang Date: Fri, 5 Jan 2018 09:40:19 -0800 Subject: [PATCH] Update dct_test.cc Make 8-bit functions testing available in high bitdepth. Change-Id: Ic030c75aa4c6b649c52426abb4bb2122882de0fe --- test/dct_test.cc | 910 +++++++++++++++++++---------------- vp9/vp9_common.mk | 31 +- vpx_dsp/vpx_dsp_rtcd_defs.pl | 4 +- 3 files changed, 511 insertions(+), 434 deletions(-) diff --git a/test/dct_test.cc b/test/dct_test.cc index addbdfb46..5b228ff73 100644 --- a/test/dct_test.cc +++ b/test/dct_test.cc @@ -40,10 +40,53 @@ typedef void (*FhtFuncRef)(const Buffer &in, Buffer *out, int size, int tx_type); typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type); +typedef void (*IhtWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd); + +template +void fdct_wrapper(const int16_t *in, tran_low_t *out, int stride, int tx_type) { + (void)tx_type; + fn(in, out, stride); +} + +template +void idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, + int bd) { + (void)tx_type; + (void)bd; + fn(in, out, stride); +} + +template +void iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, + int bd) { + (void)bd; + fn(in, out, stride, tx_type); +} + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*HighbdIdctFunc)(const tran_low_t *in, uint16_t *out, int stride, + int bd); + +typedef void (*HighbdIhtFunc)(const tran_low_t *in, uint16_t *out, int stride, + int tx_type, int bd); + +template +void highbd_idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd) { + (void)tx_type; + fn(in, CAST_TO_SHORTPTR(out), stride, bd); +} + +template +void highbd_iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, + int tx_type, int bd) { + fn(in, CAST_TO_SHORTPTR(out), stride, tx_type, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH /* forward transform, inverse transform, size, transform type, bit depth */ -typedef tuple DctParam; -typedef tuple HtParam; +typedef tuple DctParam; void fdct_ref(const Buffer &in, Buffer *out, int size, int /*tx_type*/) { @@ -81,56 +124,76 @@ void fwht_ref(const Buffer &in, Buffer *out, int size, vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride()); } -#if CONFIG_VP9_HIGHBITDEPTH -#define idctNxN(n, coeffs, bitdepth) \ - void idct##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out, \ - int stride) { \ - vpx_highbd_idct##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out), \ - stride, bitdepth); \ - } - -idctNxN(4, 16, 10); -idctNxN(4, 16, 12); -idctNxN(8, 64, 10); -idctNxN(8, 64, 12); -idctNxN(16, 256, 10); -idctNxN(16, 256, 12); -idctNxN(32, 1024, 10); -idctNxN(32, 1024, 12); - -#define ihtNxN(n, coeffs, bitdepth) \ - void iht##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out, \ - int stride, int tx_type) { \ - vp9_highbd_iht##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out), \ - stride, tx_type, bitdepth); \ - } - -ihtNxN(4, 16, 10); -ihtNxN(4, 16, 12); -ihtNxN(8, 64, 10); -ihtNxN(8, 64, 12); -ihtNxN(16, 256, 10); -// ihtNxN(16, 256, 12); - -void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) { - vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); -} - -void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) { - vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); -} -#endif // CONFIG_VP9_HIGHBITDEPTH - -class TransTestBase { +class TransTestBase : public ::testing::TestWithParam { public: - virtual void TearDown() { libvpx_test::ClearSystemState(); } + virtual void SetUp() { + rnd_.Reset(ACMRandom::DeterministicSeed()); + fwd_txfm_ = GET_PARAM(0); + inv_txfm_ = GET_PARAM(1); + size_ = GET_PARAM(2); + tx_type_ = GET_PARAM(3); + bit_depth_ = GET_PARAM(4); + pixel_size_ = GET_PARAM(5); + max_pixel_value_ = (1 << bit_depth_) - 1; + + // Randomize stride_ to a value less than or equal to 1024 + stride_ = rnd_(1024) + 1; + if (stride_ < size_) { + stride_ = size_; + } + // Align stride_ to 16 if it's bigger than 16. + if (stride_ > 16) { + stride_ &= ~15; + } + + block_size_ = size_ * stride_; + + src_ = reinterpret_cast( + vpx_memalign(16, pixel_size_ * block_size_)); + ASSERT_TRUE(src_ != NULL); + dst_ = reinterpret_cast( + vpx_memalign(16, pixel_size_ * block_size_)); + ASSERT_TRUE(dst_ != NULL); + } + + virtual void TearDown() { + vpx_free(src_); + src_ = NULL; + vpx_free(dst_); + dst_ = NULL; + libvpx_test::ClearSystemState(); + } + + void InitMem() { + if (pixel_size_ == 1) { + for (int j = 0; j < block_size_; ++j) { + src_[j] = rnd_.Rand16() & max_pixel_value_; + } + for (int j = 0; j < block_size_; ++j) { + dst_[j] = rnd_.Rand16() & max_pixel_value_; + } + } else { + ASSERT_EQ(pixel_size_, 2); + uint16_t *const src = reinterpret_cast(src_); + uint16_t *const dst = reinterpret_cast(dst_); + for (int j = 0; j < block_size_; ++j) { + src[j] = rnd_.Rand16() & max_pixel_value_; + } + for (int j = 0; j < block_size_; ++j) { + dst[j] = rnd_.Rand16() & max_pixel_value_; + } + } + } + + void RunFwdTxfm(const Buffer &in, Buffer *out) { + fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_); + } + + void RunInvTxfm(const Buffer &in, uint8_t *out) { + inv_txfm_(in.TopLeftPixel(), out, stride_, tx_type_, bit_depth_); + } protected: - virtual void RunFwdTxfm(const Buffer &in, - Buffer *out) = 0; - - virtual void RunInvTxfm(const Buffer &in, uint8_t *out) = 0; - void RunAccuracyCheck(int limit) { ACMRandom rnd(ACMRandom::DeterministicSeed()); Buffer test_input_block = @@ -139,70 +202,40 @@ class TransTestBase { Buffer test_temp_block = Buffer(size_, size_, 0, 16); ASSERT_TRUE(test_temp_block.Init()); - Buffer dst = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(dst.Init()); - Buffer src = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(src.Init()); -#if CONFIG_VP9_HIGHBITDEPTH - Buffer dst16 = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(dst16.Init()); - Buffer src16 = Buffer(size_, size_, 0, 16); - ASSERT_TRUE(src16.Init()); -#endif // CONFIG_VP9_HIGHBITDEPTH uint32_t max_error = 0; int64_t total_error = 0; const int count_test_block = 10000; for (int i = 0; i < count_test_block; ++i) { - if (bit_depth_ == 8) { - src.Set(&rnd, &ACMRandom::Rand8); - dst.Set(&rnd, &ACMRandom::Rand8); - // Initialize a test block with input range [-255, 255]. - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { + InitMem(); + for (int h = 0; h < size_; ++h) { + for (int w = 0; w < size_; ++w) { + if (pixel_size_ == 1) { test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = - src.TopLeftPixel()[h * src.stride() + w] - - dst.TopLeftPixel()[h * dst.stride() + w]; + src_[h * stride_ + w] - dst_[h * stride_ + w]; + } else { + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = + src[h * stride_ + w] - dst[h * stride_ + w]; } } -#if CONFIG_VP9_HIGHBITDEPTH - } else { - src16.Set(&rnd, 0, max_pixel_value_); - dst16.Set(&rnd, 0, max_pixel_value_); - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { - test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = - src16.TopLeftPixel()[h * src16.stride() + w] - - dst16.TopLeftPixel()[h * dst16.stride() + w]; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH } ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block)); - if (bit_depth_ == VPX_BITS_8) { - ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, dst.TopLeftPixel())); -#if CONFIG_VP9_HIGHBITDEPTH - } else { - ASM_REGISTER_STATE_CHECK( - RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16.TopLeftPixel()))); -#endif // CONFIG_VP9_HIGHBITDEPTH - } + ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst_)); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { int diff; -#if CONFIG_VP9_HIGHBITDEPTH - if (bit_depth_ != 8) { - diff = dst16.TopLeftPixel()[h * dst16.stride() + w] - - src16.TopLeftPixel()[h * src16.stride() + w]; + if (pixel_size_ == 1) { + diff = dst_[h * stride_ + w] - src_[h * stride_ + w]; } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - diff = dst.TopLeftPixel()[h * dst.stride() + w] - - src.TopLeftPixel()[h * src.stride() + w]; -#if CONFIG_VP9_HIGHBITDEPTH + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + diff = dst[h * stride_ + w] - src[h * stride_ + w]; } -#endif // CONFIG_VP9_HIGHBITDEPTH const uint32_t error = diff * diff; if (max_error < error) max_error = error; total_error += error; @@ -314,57 +347,39 @@ class TransTestBase { ASSERT_TRUE(src16.Init()); for (int i = 0; i < count_test_block; ++i) { + InitMem(); // Initialize a test block with input range [-max_pixel_value_, // max_pixel_value_]. - if (bit_depth_ == VPX_BITS_8) { - src.Set(&rnd, &ACMRandom::Rand8); - dst.Set(&rnd, &ACMRandom::Rand8); - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { + for (int h = 0; h < size_; ++h) { + for (int w = 0; w < size_; ++w) { + if (pixel_size_ == 1) { in.TopLeftPixel()[h * in.stride() + w] = - src.TopLeftPixel()[h * src.stride() + w] - - dst.TopLeftPixel()[h * dst.stride() + w]; + src_[h * stride_ + w] - dst_[h * stride_ + w]; + } else { + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + in.TopLeftPixel()[h * in.stride() + w] = + src[h * stride_ + w] - dst[h * stride_ + w]; } } -#if CONFIG_VP9_HIGHBITDEPTH - } else { - src16.Set(&rnd, 0, max_pixel_value_); - dst16.Set(&rnd, 0, max_pixel_value_); - for (int h = 0; h < size_; ++h) { - for (int w = 0; w < size_; ++w) { - in.TopLeftPixel()[h * in.stride() + w] = - src16.TopLeftPixel()[h * src16.stride() + w] - - dst16.TopLeftPixel()[h * dst16.stride() + w]; - } - } -#endif // CONFIG_VP9_HIGHBITDEPTH } fwd_txfm_ref(in, &coeff, size_, tx_type_); - if (bit_depth_ == VPX_BITS_8) { - ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst.TopLeftPixel())); -#if CONFIG_VP9_HIGHBITDEPTH - } else { - ASM_REGISTER_STATE_CHECK( - RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16.TopLeftPixel()))); -#endif // CONFIG_VP9_HIGHBITDEPTH - } + ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst_)); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { int diff; -#if CONFIG_VP9_HIGHBITDEPTH - if (bit_depth_ != 8) { - diff = dst16.TopLeftPixel()[h * dst16.stride() + w] - - src16.TopLeftPixel()[h * src16.stride() + w]; + if (pixel_size_ == 1) { + diff = dst_[h * stride_ + w] - src_[h * stride_ + w]; } else { -#endif // CONFIG_VP9_HIGHBITDEPTH - diff = dst.TopLeftPixel()[h * dst.stride() + w] - - src.TopLeftPixel()[h * src.stride() + w]; -#if CONFIG_VP9_HIGHBITDEPTH + ASSERT_EQ(pixel_size_, 2); + const uint16_t *const src = reinterpret_cast(src_); + const uint16_t *const dst = reinterpret_cast(dst_); + diff = dst[h * stride_ + w] - src[h * stride_ + w]; } -#endif // CONFIG_VP9_HIGHBITDEPTH const uint32_t error = diff * diff; EXPECT_GE(static_cast(limit), error) << "Error: " << size_ << "x" << size_ << " IDCT has error " @@ -374,37 +389,26 @@ class TransTestBase { } } + FhtFunc fwd_txfm_; FhtFuncRef fwd_txfm_ref; + IhtWithBdFunc inv_txfm_; + ACMRandom rnd_; + uint8_t *src_; + uint8_t *dst_; vpx_bit_depth_t bit_depth_; int tx_type_; int max_pixel_value_; int size_; + int stride_; + int pixel_size_; + int block_size_; }; -class TransDCT : public TransTestBase, - public ::testing::TestWithParam { +/* -------------------------------------------------------------------------- */ + +class TransDCT : public TransTestBase { public: - TransDCT() { - fwd_txfm_ref = fdct_ref; - fwd_txfm_ = GET_PARAM(0); - inv_txfm_ = GET_PARAM(1); - size_ = GET_PARAM(2); - tx_type_ = GET_PARAM(3); - bit_depth_ = GET_PARAM(4); - max_pixel_value_ = (1 << bit_depth_) - 1; - } - - protected: - void RunFwdTxfm(const Buffer &in, Buffer *out) { - fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride()); - } - - void RunInvTxfm(const Buffer &in, uint8_t *out) { - inv_txfm_(in.TopLeftPixel(), out, in.stride()); - } - - FdctFunc fwd_txfm_; - IdctFunc inv_txfm_; + TransDCT() { fwd_txfm_ref = fdct_ref; } }; TEST_P(TransDCT, AccuracyCheck) { RunAccuracyCheck(1); } @@ -415,174 +419,182 @@ TEST_P(TransDCT, MemCheck) { RunMemCheck(); } TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } +/* TODO:(johannkoenig) Determine why these fail AccuracyCheck + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 32, 0, VPX_BITS_12, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 16, 0, VPX_BITS_12, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 32, 0, VPX_BITS_12, + 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 16, 0, VPX_BITS_12, + 2), +*/ + +const DctParam c_dct_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH -INSTANTIATE_TEST_CASE_P( - C, TransDCT, - ::testing::Values( - make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 32, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 32, 0, VPX_BITS_10), - make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 16, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 16, 0, VPX_BITS_10), - make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 8, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 8, 0, VPX_BITS_10), - make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8))); -#else -INSTANTIATE_TEST_CASE_P( - C, TransDCT, - ::testing::Values( - make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8))); + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 32, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 32, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 16, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 16, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 8, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 8, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 8, 0, + VPX_BITS_12, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_12, 2), #endif // CONFIG_VP9_HIGHBITDEPTH + make_tuple(&fdct_wrapper, + &idct_wrapper, 32, 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 16, 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, &idct_wrapper, + 8, 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, &idct_wrapper, + 4, 0, VPX_BITS_8, 1) +}; + +INSTANTIATE_TEST_CASE_P(C, TransDCT, ::testing::ValuesIn(c_dct_tests)); + +#if !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 -#if !CONFIG_EMULATE_HARDWARE +const DctParam sse2_dct_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH -/* TODO:(johannkoenig) Determine why these fail AccuracyCheck - make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 32, 0, VPX_BITS_12), - make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 16, 0, VPX_BITS_12), -*/ -INSTANTIATE_TEST_CASE_P( - SSE2, TransDCT, - ::testing::Values( - make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 32, 0, - VPX_BITS_10), - make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_sse2, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 16, 0, - VPX_BITS_10), - make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_sse2, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_10, 8, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_12, 8, 0, VPX_BITS_12), - make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8, 0, - VPX_BITS_8), - make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4, 0, - VPX_BITS_8))); -#else -INSTANTIATE_TEST_CASE_P( - SSE2, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_sse2, - &vpx_idct32x32_1024_add_sse2, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_sse2, - &vpx_idct16x16_256_add_sse2, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8, - 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4, - 0, VPX_BITS_8))); + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 32, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 32, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 16, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 16, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 8, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 8, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 8, 0, + VPX_BITS_12, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_12, 2), #endif // CONFIG_VP9_HIGHBITDEPTH -#endif // !CONFIG_EMULATE_HARDWARE + make_tuple(&fdct_wrapper, + &idct_wrapper, 32, 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 16, 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 8, 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 4, 0, VPX_BITS_8, 1) +}; + +INSTANTIATE_TEST_CASE_P(SSE2, TransDCT, ::testing::ValuesIn(sse2_dct_tests)); #endif // HAVE_SSE2 -#if !CONFIG_VP9_HIGHBITDEPTH -#if HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE -#if !ARCH_X86_64 +#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 // TODO(johannkoenig): high bit depth fdct8x8. INSTANTIATE_TEST_CASE_P( SSSE3, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2, - 32, 0, VPX_BITS_8), - make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_sse2, 8, 0, - VPX_BITS_8))); -#else -// vpx_fdct8x8_ssse3 is only available in 64 bit builds. -INSTANTIATE_TEST_CASE_P( - SSSE3, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2, - 32, 0, VPX_BITS_8), - make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_sse2, - 8, 0, VPX_BITS_8))); -#endif // !ARCH_X86_64 -#endif // HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE -#endif // !CONFIG_VP9_HIGHBITDEPTH + ::testing::Values( + // vpx_fdct8x8_ssse3 is only available in 64 bit builds. + make_tuple(&fdct_wrapper, + &idct_wrapper, 8, 0, VPX_BITS_8, + 1))); +#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 -#if !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE +#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH // TODO(johannkoenig): high bit depth fdct32x32. INSTANTIATE_TEST_CASE_P( - AVX2, TransDCT, ::testing::Values(make_tuple(&vpx_fdct32x32_avx2, - &vpx_idct32x32_1024_add_sse2, - 32, 0, VPX_BITS_8))); - -#endif // !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE + AVX2, TransDCT, + ::testing::Values(make_tuple(&fdct_wrapper, + &idct_wrapper, 32, + 0, VPX_BITS_8, 1))); +#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON -#if !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( NEON, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct32x32_neon, - &vpx_idct32x32_1024_add_neon, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_neon, - &vpx_idct16x16_256_add_neon, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 8, - 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_neon, &vpx_idct4x4_16_add_neon, 4, - 0, VPX_BITS_8))); -#endif // !CONFIG_EMULATE_HARDWARE + ::testing::Values(make_tuple(&fdct_wrapper, + &idct_wrapper, 32, + 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 16, + 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 8, 0, + VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 4, 0, + VPX_BITS_8, 1))); #endif // HAVE_NEON -#if HAVE_MSA -#if !CONFIG_VP9_HIGHBITDEPTH -#if !CONFIG_EMULATE_HARDWARE +#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( MSA, TransDCT, - ::testing::Values( - make_tuple(&vpx_fdct32x32_msa, &vpx_idct32x32_1024_add_msa, 32, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct16x16_msa, &vpx_idct16x16_256_add_msa, 16, 0, - VPX_BITS_8), - make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 8, 0, VPX_BITS_8), - make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 4, 0, - VPX_BITS_8))); + ::testing::Values(make_tuple(&fdct_wrapper, + &idct_wrapper, 32, + 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 16, + 0, VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 8, 0, + VPX_BITS_8, 1), + make_tuple(&fdct_wrapper, + &idct_wrapper, 4, 0, + VPX_BITS_8, 1))); +#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH + +#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + VSX, TransDCT, + ::testing::Values(make_tuple(&fdct_wrapper, + &idct_wrapper, 4, 0, + VPX_BITS_8, 1))); +#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && + #endif // !CONFIG_EMULATE_HARDWARE -#endif // !CONFIG_VP9_HIGHBITDEPTH -#endif // HAVE_MSA -#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE -INSTANTIATE_TEST_CASE_P(VSX, TransDCT, - ::testing::Values(make_tuple(&vpx_fdct4x4_c, - &vpx_idct4x4_16_add_vsx, 4, - 0, VPX_BITS_8))); -#endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE +/* -------------------------------------------------------------------------- */ -class TransHT : public TransTestBase, public ::testing::TestWithParam { +class TransHT : public TransTestBase { public: - TransHT() { - fwd_txfm_ref = fht_ref; - fwd_txfm_ = GET_PARAM(0); - inv_txfm_ = GET_PARAM(1); - size_ = GET_PARAM(2); - tx_type_ = GET_PARAM(3); - bit_depth_ = GET_PARAM(4); - max_pixel_value_ = (1 << bit_depth_) - 1; - } - - protected: - void RunFwdTxfm(const Buffer &in, Buffer *out) { - fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_); - } - - void RunInvTxfm(const Buffer &in, uint8_t *out) { - inv_txfm_(in.TopLeftPixel(), out, in.stride(), tx_type_); - } - - FhtFunc fwd_txfm_; - IhtFunc inv_txfm_; + TransHT() { fwd_txfm_ref = fht_ref; } }; TEST_P(TransHT, AccuracyCheck) { RunAccuracyCheck(1); } @@ -594,116 +606,183 @@ TEST_P(TransHT, MemCheck) { RunMemCheck(); } TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } /* TODO:(johannkoenig) Determine why these fail AccuracyCheck - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 0, VPX_BITS_12), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 1, VPX_BITS_12), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 2, VPX_BITS_12), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 3, VPX_BITS_12), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 0, VPX_BITS_12, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 1, VPX_BITS_12, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 2, VPX_BITS_12, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 3, VPX_BITS_12, 2), */ + +const DctParam c_ht_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH -INSTANTIATE_TEST_CASE_P( - C, TransHT, - ::testing::Values( - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 1, VPX_BITS_10), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 2, VPX_BITS_10), - make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 3, VPX_BITS_10), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 1, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 2, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 3, VPX_BITS_10), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 0, VPX_BITS_12), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 1, VPX_BITS_12), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 2, VPX_BITS_12), - make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 3, VPX_BITS_12), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 1, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 2, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 3, VPX_BITS_10), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 1, VPX_BITS_12), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 2, VPX_BITS_12), - make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 3, VPX_BITS_12), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8))); -#else -INSTANTIATE_TEST_CASE_P( - C, TransHT, - ::testing::Values( - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8), - make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 0, + VPX_BITS_8, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 1, + VPX_BITS_8, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 2, + VPX_BITS_8, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 3, + VPX_BITS_8, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 0, + VPX_BITS_10, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 1, + VPX_BITS_10, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 2, + VPX_BITS_10, 2), + make_tuple(&vp9_highbd_fht16x16_c, + &highbd_iht_wrapper, 16, 3, + VPX_BITS_10, 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 0, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 1, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 2, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 3, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 0, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 1, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 2, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 3, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 0, VPX_BITS_12, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 1, VPX_BITS_12, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 2, VPX_BITS_12, + 2), + make_tuple(&vp9_highbd_fht8x8_c, + &highbd_iht_wrapper, 8, 3, VPX_BITS_12, + 2), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8), - make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8), - - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8), - make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8))); + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 0, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 1, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 2, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 3, VPX_BITS_8, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 0, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 1, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 2, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 3, VPX_BITS_10, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 0, VPX_BITS_12, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 1, VPX_BITS_12, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 2, VPX_BITS_12, + 2), + make_tuple(&vp9_highbd_fht4x4_c, + &highbd_iht_wrapper, 4, 3, VPX_BITS_12, + 2), #endif // CONFIG_VP9_HIGHBITDEPTH + make_tuple(&vp9_fht16x16_c, &iht_wrapper, 16, 0, + VPX_BITS_8, 1), + make_tuple(&vp9_fht16x16_c, &iht_wrapper, 16, 1, + VPX_BITS_8, 1), + make_tuple(&vp9_fht16x16_c, &iht_wrapper, 16, 2, + VPX_BITS_8, 1), + make_tuple(&vp9_fht16x16_c, &iht_wrapper, 16, 3, + VPX_BITS_8, 1), -#if HAVE_SSE2 + make_tuple(&vp9_fht8x8_c, &iht_wrapper, 8, 0, VPX_BITS_8, + 1), + make_tuple(&vp9_fht8x8_c, &iht_wrapper, 8, 1, VPX_BITS_8, + 1), + make_tuple(&vp9_fht8x8_c, &iht_wrapper, 8, 2, VPX_BITS_8, + 1), + make_tuple(&vp9_fht8x8_c, &iht_wrapper, 8, 3, VPX_BITS_8, + 1), + + make_tuple(&vp9_fht4x4_c, &iht_wrapper, 4, 0, VPX_BITS_8, + 1), + make_tuple(&vp9_fht4x4_c, &iht_wrapper, 4, 1, VPX_BITS_8, + 1), + make_tuple(&vp9_fht4x4_c, &iht_wrapper, 4, 2, VPX_BITS_8, + 1), + make_tuple(&vp9_fht4x4_c, &iht_wrapper, 4, 3, VPX_BITS_8, + 1) +}; + +INSTANTIATE_TEST_CASE_P(C, TransHT, ::testing::ValuesIn(c_ht_tests)); + +#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, TransHT, ::testing::Values( - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 0, - VPX_BITS_8), - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 1, - VPX_BITS_8), - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 2, - VPX_BITS_8), - make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 3, - VPX_BITS_8), + make_tuple(&vp9_fht16x16_sse2, &iht_wrapper, + 16, 0, VPX_BITS_8, 1), + make_tuple(&vp9_fht16x16_sse2, &iht_wrapper, + 16, 1, VPX_BITS_8, 1), + make_tuple(&vp9_fht16x16_sse2, &iht_wrapper, + 16, 2, VPX_BITS_8, 1), + make_tuple(&vp9_fht16x16_sse2, &iht_wrapper, + 16, 3, VPX_BITS_8, 1), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 0, VPX_BITS_8), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 1, VPX_BITS_8), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 2, VPX_BITS_8), - make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 3, VPX_BITS_8), + make_tuple(&vp9_fht8x8_sse2, &iht_wrapper, 8, 0, + VPX_BITS_8, 1), + make_tuple(&vp9_fht8x8_sse2, &iht_wrapper, 8, 1, + VPX_BITS_8, 1), + make_tuple(&vp9_fht8x8_sse2, &iht_wrapper, 8, 2, + VPX_BITS_8, 1), + make_tuple(&vp9_fht8x8_sse2, &iht_wrapper, 8, 3, + VPX_BITS_8, 1), - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 0, VPX_BITS_8), - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 1, VPX_BITS_8), - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 2, VPX_BITS_8), - make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 3, - VPX_BITS_8))); -#endif // HAVE_SSE2 + make_tuple(&vp9_fht4x4_sse2, &iht_wrapper, 4, 0, + VPX_BITS_8, 1), + make_tuple(&vp9_fht4x4_sse2, &iht_wrapper, 4, 1, + VPX_BITS_8, 1), + make_tuple(&vp9_fht4x4_sse2, &iht_wrapper, 4, 2, + VPX_BITS_8, 1), + make_tuple(&vp9_fht4x4_sse2, &iht_wrapper, 4, 3, + VPX_BITS_8, 1))); +#endif // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE -class TransWHT : public TransTestBase, - public ::testing::TestWithParam { +/* -------------------------------------------------------------------------- */ + +class TransWHT : public TransTestBase { public: - TransWHT() { - fwd_txfm_ref = fwht_ref; - fwd_txfm_ = GET_PARAM(0); - inv_txfm_ = GET_PARAM(1); - size_ = GET_PARAM(2); - tx_type_ = GET_PARAM(3); - bit_depth_ = GET_PARAM(4); - max_pixel_value_ = (1 << bit_depth_) - 1; - } - - protected: - void RunFwdTxfm(const Buffer &in, Buffer *out) { - fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride()); - } - - void RunInvTxfm(const Buffer &in, uint8_t *out) { - inv_txfm_(in.TopLeftPixel(), out, in.stride()); - } - - FdctFunc fwd_txfm_; - IdctFunc inv_txfm_; + TransWHT() { fwd_txfm_ref = fwht_ref; } }; TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); } @@ -714,24 +793,29 @@ TEST_P(TransWHT, MemCheck) { RunMemCheck(); } TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } +const DctParam c_wht_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH -INSTANTIATE_TEST_CASE_P( - C, TransWHT, - ::testing::Values( - make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 4, 0, VPX_BITS_10), - make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 4, 0, VPX_BITS_12), - make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 4, 0, VPX_BITS_8))); -#else -INSTANTIATE_TEST_CASE_P(C, TransWHT, - ::testing::Values(make_tuple(&vp9_fwht4x4_c, - &vpx_iwht4x4_16_add_c, 4, - 0, VPX_BITS_8))); + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_8, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_10, 2), + make_tuple(&fdct_wrapper, + &highbd_idct_wrapper, 4, 0, + VPX_BITS_12, 2), #endif // CONFIG_VP9_HIGHBITDEPTH + make_tuple(&fdct_wrapper, &idct_wrapper, + 4, 0, VPX_BITS_8, 1) +}; -#if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, TransWHT, - ::testing::Values(make_tuple(&vp9_fwht4x4_sse2, - &vpx_iwht4x4_16_add_sse2, - 4, 0, VPX_BITS_8))); -#endif // HAVE_SSE2 +INSTANTIATE_TEST_CASE_P(C, TransWHT, ::testing::ValuesIn(c_wht_tests)); + +#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE +INSTANTIATE_TEST_CASE_P( + SSE2, TransWHT, + ::testing::Values(make_tuple(&fdct_wrapper, + &idct_wrapper, 4, 0, + VPX_BITS_8, 1))); +#endif // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 5bfc0d359..2fb9a5507 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -63,30 +63,23 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c + +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c +VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c + ifeq ($(CONFIG_VP9_POSTPROC),yes) +VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm endif ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c -VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c -endif - -# common (msa) -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c - -ifeq ($(CONFIG_VP9_POSTPROC),yes) -VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c -endif - -VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c - -ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c -VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c +VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c +VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c +VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c endif $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 1a743d910..41ba20f67 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -626,6 +626,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; specialize qw/vpx_idct32x32_1_add neon sse2/; + specialize qw/vpx_iwht4x4_16_add sse2/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { # Note that these specializations are appended to the above ones. @@ -646,7 +647,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa; specialize qw/vpx_idct32x32_34_add dspr2 msa/; specialize qw/vpx_idct32x32_1_add dspr2 msa/; - specialize qw/vpx_iwht4x4_16_add msa sse2/; + specialize qw/vpx_iwht4x4_16_add msa/; specialize qw/vpx_iwht4x4_1_add msa/; } # !CONFIG_VP9_HIGHBITDEPTH } # !CONFIG_EMULATE_HARDWARE @@ -654,7 +655,6 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Note as optimized versions of these functions are added we need to add a check to ensure # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only. - specialize qw/vpx_iwht4x4_16_add sse2/; add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd"; add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";