quantize test: test _fp_ version of quantize
None of the x86 optimizations pass the tests. Change-Id: Ic67f2ba1977b657e68f2a13b0711fc5fcbafd909
This commit is contained in:
parent
13eed991f9
commit
661efeca97
@ -45,6 +45,28 @@ typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
|
|||||||
int /*max_size*/>
|
int /*max_size*/>
|
||||||
QuantizeParam;
|
QuantizeParam;
|
||||||
|
|
||||||
|
// Wrapper for FP version which does not use zbin or quant_shift.
|
||||||
|
typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count,
|
||||||
|
int skip_block, const int16_t *round,
|
||||||
|
const int16_t *quant, tran_low_t *qcoeff,
|
||||||
|
tran_low_t *dqcoeff, const int16_t *dequant,
|
||||||
|
uint16_t *eob, const int16_t *scan,
|
||||||
|
const int16_t *iscan);
|
||||||
|
|
||||||
|
template <QuantizeFPFunc fn>
|
||||||
|
void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
|
||||||
|
const int16_t *zbin, const int16_t *round,
|
||||||
|
const int16_t *quant, const int16_t *quant_shift,
|
||||||
|
tran_low_t *qcoeff, tran_low_t *dqcoeff,
|
||||||
|
const int16_t *dequant, uint16_t *eob, const int16_t *scan,
|
||||||
|
const int16_t *iscan) {
|
||||||
|
(void)zbin;
|
||||||
|
(void)quant_shift;
|
||||||
|
|
||||||
|
fn(coeff, count, skip_block, round, quant, qcoeff, dqcoeff, dequant, eob,
|
||||||
|
scan, iscan);
|
||||||
|
}
|
||||||
|
|
||||||
class VP9QuantizeBase {
|
class VP9QuantizeBase {
|
||||||
public:
|
public:
|
||||||
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size)
|
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size)
|
||||||
@ -146,13 +168,13 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
|
|||||||
const int skip_block = 0;
|
const int skip_block = 0;
|
||||||
TX_SIZE sz;
|
TX_SIZE sz;
|
||||||
if (max_size_ == 16) {
|
if (max_size_ == 16) {
|
||||||
sz = (TX_SIZE)(i % 3); // TX_4X4, TX_8X8 TX_16X16
|
sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
|
||||||
} else {
|
} else {
|
||||||
sz = TX_32X32;
|
sz = TX_32X32;
|
||||||
}
|
}
|
||||||
const TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
|
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
|
||||||
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
||||||
const int count = (4 << sz) * (4 << sz); // 16, 64, 256
|
const int count = (4 << sz) * (4 << sz);
|
||||||
coeff.Set(&rnd, -max_value_, max_value_);
|
coeff.Set(&rnd, -max_value_, max_value_);
|
||||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||||
quant_shift_ptr_, dequant_ptr_);
|
quant_shift_ptr_, dequant_ptr_);
|
||||||
@ -162,6 +184,7 @@ TEST_P(VP9QuantizeTest, OperationCheck) {
|
|||||||
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
|
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
|
||||||
dequant_ptr_, &ref_eob, scan_order->scan,
|
dequant_ptr_, &ref_eob, scan_order->scan,
|
||||||
scan_order->iscan);
|
scan_order->iscan);
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
||||||
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
||||||
@ -197,16 +220,16 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
|
|||||||
uint16_t eob, ref_eob;
|
uint16_t eob, ref_eob;
|
||||||
|
|
||||||
for (int i = 0; i < number_of_iterations; ++i) {
|
for (int i = 0; i < number_of_iterations; ++i) {
|
||||||
int skip_block = 0;
|
const int skip_block = 0;
|
||||||
TX_SIZE sz;
|
TX_SIZE sz;
|
||||||
if (max_size_ == 16) {
|
if (max_size_ == 16) {
|
||||||
sz = (TX_SIZE)(i % 3); // TX_4X4, TX_8X8 TX_16X16
|
sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
|
||||||
} else {
|
} else {
|
||||||
sz = TX_32X32;
|
sz = TX_32X32;
|
||||||
}
|
}
|
||||||
TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
|
const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
|
||||||
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
||||||
int count = (4 << sz) * (4 << sz); // 16, 64, 256
|
int count = (4 << sz) * (4 << sz);
|
||||||
// Two random entries
|
// Two random entries
|
||||||
coeff.Set(0);
|
coeff.Set(0);
|
||||||
coeff.TopLeftPixel()[rnd(count)] =
|
coeff.TopLeftPixel()[rnd(count)] =
|
||||||
@ -221,6 +244,7 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
|
|||||||
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
|
ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
|
||||||
dequant_ptr_, &ref_eob, scan_order->scan,
|
dequant_ptr_, &ref_eob, scan_order->scan,
|
||||||
scan_order->iscan);
|
scan_order->iscan);
|
||||||
|
|
||||||
ASM_REGISTER_STATE_CHECK(
|
ASM_REGISTER_STATE_CHECK(
|
||||||
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_,
|
||||||
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
round_ptr_, quant_ptr_, quant_shift_ptr_,
|
||||||
@ -250,16 +274,14 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
|||||||
Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
|
||||||
ASSERT_TRUE(dqcoeff.Init());
|
ASSERT_TRUE(dqcoeff.Init());
|
||||||
uint16_t eob;
|
uint16_t eob;
|
||||||
int starting_sz, ending_sz;
|
TX_SIZE starting_sz, ending_sz;
|
||||||
|
|
||||||
if (max_size_ == 16) {
|
if (max_size_ == 16) {
|
||||||
// TX_4X4, TX_8X8 TX_16X16
|
starting_sz = TX_4X4;
|
||||||
starting_sz = 0;
|
ending_sz = TX_16X16;
|
||||||
ending_sz = 2;
|
|
||||||
} else {
|
} else {
|
||||||
// TX_32X32
|
starting_sz = TX_32X32;
|
||||||
starting_sz = 3;
|
ending_sz = TX_32X32;
|
||||||
ending_sz = 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
|
for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
|
||||||
@ -270,17 +292,17 @@ TEST_P(VP9QuantizeTest, DISABLED_Speed) {
|
|||||||
// Pick the first one.
|
// Pick the first one.
|
||||||
const TX_TYPE tx_type = DCT_DCT;
|
const TX_TYPE tx_type = DCT_DCT;
|
||||||
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
|
||||||
const int count = (4 << sz) * (4 << sz); // 16, 64, 256
|
const int count = (4 << sz) * (4 << sz);
|
||||||
|
|
||||||
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
|
||||||
quant_shift_ptr_, dequant_ptr_);
|
quant_shift_ptr_, dequant_ptr_);
|
||||||
|
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
// When |coeff values| are less than zbin the results are 0.
|
// When |coeff values| are less than zbin the results are 0.
|
||||||
zbin_ptr_[0] = zbin_ptr_[1] = 100;
|
for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 100;
|
||||||
coeff.Set(&rnd, -99, 99);
|
coeff.Set(&rnd, -99, 99);
|
||||||
} else if (i == 1) {
|
} else if (i == 1) {
|
||||||
zbin_ptr_[0] = zbin_ptr_[1] = 50;
|
for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
|
||||||
coeff.Set(&rnd, -500, 500);
|
coeff.Set(&rnd, -500, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,12 +347,19 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32),
|
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32),
|
||||||
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
|
make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
|
||||||
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32)));
|
&vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32)));
|
||||||
|
|
||||||
#else
|
#else
|
||||||
INSTANTIATE_TEST_CASE_P(SSE2, VP9QuantizeTest,
|
INSTANTIATE_TEST_CASE_P(SSE2, VP9QuantizeTest,
|
||||||
::testing::Values(make_tuple(&vpx_quantize_b_sse2,
|
::testing::Values(make_tuple(&vpx_quantize_b_sse2,
|
||||||
&vpx_quantize_b_c,
|
&vpx_quantize_b_c,
|
||||||
VPX_BITS_8, 16)));
|
VPX_BITS_8, 16)));
|
||||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
DISABLED_SSE2, VP9QuantizeTest,
|
||||||
|
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
|
||||||
|
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
|
||||||
|
16)));
|
||||||
#endif // HAVE_SSE2
|
#endif // HAVE_SSE2
|
||||||
|
|
||||||
#if HAVE_SSSE3
|
#if HAVE_SSSE3
|
||||||
@ -347,7 +376,13 @@ INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
|
|||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
DISABLED_SSSE3, VP9QuantizeTest,
|
DISABLED_SSSE3, VP9QuantizeTest,
|
||||||
::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
|
::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
|
||||||
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32)));
|
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32),
|
||||||
|
make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
|
||||||
|
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
|
||||||
|
16),
|
||||||
|
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
|
||||||
|
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
|
||||||
|
VPX_BITS_8, 32)));
|
||||||
#endif // ARCH_X86_64
|
#endif // ARCH_X86_64
|
||||||
#endif // HAVE_SSSE3
|
#endif // HAVE_SSSE3
|
||||||
|
|
||||||
@ -358,6 +393,7 @@ INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest,
|
|||||||
::testing::Values(make_tuple(&vpx_quantize_b_avx,
|
::testing::Values(make_tuple(&vpx_quantize_b_avx,
|
||||||
&vpx_quantize_b_c,
|
&vpx_quantize_b_c,
|
||||||
VPX_BITS_8, 16)));
|
VPX_BITS_8, 16)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest,
|
INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest,
|
||||||
::testing::Values(make_tuple(&vpx_quantize_b_32x32_avx,
|
::testing::Values(make_tuple(&vpx_quantize_b_32x32_avx,
|
||||||
&vpx_quantize_b_32x32_c,
|
&vpx_quantize_b_32x32_c,
|
||||||
@ -368,10 +404,12 @@ INSTANTIATE_TEST_CASE_P(DISABLED_AVX, VP9QuantizeTest,
|
|||||||
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
|
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NEON, VP9QuantizeTest,
|
NEON, VP9QuantizeTest,
|
||||||
::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
|
::testing::Values(
|
||||||
VPX_BITS_8, 16),
|
make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16),
|
||||||
make_tuple(&vpx_quantize_b_32x32_neon,
|
make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c,
|
||||||
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32)));
|
VPX_BITS_8, 32),
|
||||||
|
make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
|
||||||
|
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16)));
|
||||||
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
|
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
|
||||||
|
|
||||||
// Only useful to compare "Speed" test results.
|
// Only useful to compare "Speed" test results.
|
||||||
@ -379,5 +417,10 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
DISABLED_C, VP9QuantizeTest,
|
DISABLED_C, VP9QuantizeTest,
|
||||||
::testing::Values(
|
::testing::Values(
|
||||||
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16),
|
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16),
|
||||||
make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 32)));
|
make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
|
||||||
|
32),
|
||||||
|
make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
|
||||||
|
&QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16),
|
||||||
|
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
|
||||||
|
&QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32)));
|
||||||
} // namespace
|
} // namespace
|
||||||
|
Loading…
x
Reference in New Issue
Block a user