From cd8cfb86750ab9dd1bbab788b2f1dee41f84e1da Mon Sep 17 00:00:00 2001 From: Yi Luo Date: Mon, 9 May 2016 18:34:16 -0700 Subject: [PATCH] Change inverse HT function argument from TXFM_2D_CFG* to int This change has no performance impact. It prepares the proper function interface for better performance optimization. Change-Id: I12e2f2deaf7f3adc603de0a74852116468c762f6 --- test/vp10_inv_txfm2d_test.cc | 2 +- test/vp10_txfm_test.h | 6 +- vp10/common/idct.c | 35 ++-------- vp10/common/vp10_inv_txfm2d.c | 126 +++++++++++++++++++++++++++++++--- vp10/common/vp10_rtcd_defs.pl | 10 +-- 5 files changed, 128 insertions(+), 51 deletions(-) diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc index c3552dc62..80ac78b80 100644 --- a/test/vp10_inv_txfm2d_test.cc +++ b/test/vp10_inv_txfm2d_test.cc @@ -84,7 +84,7 @@ TEST(vp10_inv_txfm2d, round_trip) { } fwd_txfm_func(input, output, txfm_size, tx_type, bd); - inv_txfm_func(output, ref_input, txfm_size, inv_txfm_cfg, bd); + inv_txfm_func(output, ref_input, txfm_size, tx_type, bd); for (int ni = 0; ni < sqr_txfm_size; ++ni) { EXPECT_LE(abs(input[ni] - ref_input[ni]), 4); diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h index 6b0bd0a98..c4d03cea6 100644 --- a/test/vp10_txfm_test.h +++ b/test/vp10_txfm_test.h @@ -104,10 +104,8 @@ static double compute_avg_abs_error(const Type1* a, const Type2* b, typedef void (*TxfmFunc)(const int32_t* in, int32_t* out, const int8_t* cos_bit, const int8_t* range_bit); -typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, const int, - int tx_type, const int); -typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, const int, - const TXFM_2D_CFG*, const int); +typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, int, int, int); +typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, int, int, int); static const int bd = 10; static const int input_base = (1 << bd); diff --git a/vp10/common/idct.c b/vp10/common/idct.c index b7da81b49..ab17cca3d 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -1302,20 +1302,11 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, switch (tx_type) { case DCT_DCT: - vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_dct_dct_4, bd); - break; case ADST_DCT: - vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_adst_dct_4, bd); - break; case DCT_ADST: - vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_dct_adst_4, bd); - break; case ADST_ADST: vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_adst_adst_4, bd); + tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -1350,20 +1341,11 @@ void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, (void)eob; switch (tx_type) { case DCT_DCT: - vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_dct_dct_8, bd); - break; case ADST_DCT: - vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_adst_dct_8, bd); - break; case DCT_ADST: - vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_dct_adst_8, bd); - break; case ADST_ADST: vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_adst_adst_8, bd); + tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -1398,20 +1380,11 @@ void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, (void)eob; switch (tx_type) { case DCT_DCT: - vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_dct_dct_16, bd); - break; case ADST_DCT: - vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_adst_dct_16, bd); - break; case DCT_ADST: - vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_dct_adst_16, bd); - break; case ADST_ADST: vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_adst_adst_16, bd); + tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -1447,7 +1420,7 @@ void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, switch (tx_type) { case DCT_DCT: vp10_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride, - &inv_txfm_2d_cfg_dct_dct_32, bd); + DCT_DCT, bd); break; #if CONFIG_EXT_TX case ADST_DCT: diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c index 5227fc8e7..3ae54c9fa 100644 --- a/vp10/common/vp10_inv_txfm2d.c +++ b/vp10/common/vp10_inv_txfm2d.c @@ -8,8 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "vp10/common/enums.h" #include "vp10/common/vp10_txfm.h" #include "vp10/common/vp10_inv_txfm1d.h" +#include "vp10/common/vp10_inv_txfm2d_cfg.h" static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { switch (txfm_type) { @@ -46,6 +48,105 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) { } } +static const TXFM_2D_CFG* vp10_get_inv_txfm_4x4_cfg(int tx_type) { + const TXFM_2D_CFG* cfg = NULL; + switch (tx_type) { + case DCT_DCT: + cfg = &inv_txfm_2d_cfg_dct_dct_4; + break; + case ADST_DCT: + cfg = &inv_txfm_2d_cfg_adst_dct_4; + break; + case DCT_ADST: + cfg = &inv_txfm_2d_cfg_dct_adst_4; + break; + case ADST_ADST: + cfg = &inv_txfm_2d_cfg_adst_adst_4; + break; + default: + assert(0); + } + return cfg; +} + +static const TXFM_2D_CFG* vp10_get_inv_txfm_8x8_cfg(int tx_type) { + const TXFM_2D_CFG* cfg = NULL; + switch (tx_type) { + case DCT_DCT: + cfg = &inv_txfm_2d_cfg_dct_dct_8; + break; + case ADST_DCT: + cfg = &inv_txfm_2d_cfg_adst_dct_8; + break; + case DCT_ADST: + cfg = &inv_txfm_2d_cfg_dct_adst_8; + break; + case ADST_ADST: + cfg = &inv_txfm_2d_cfg_adst_adst_8; + break; + default: + assert(0); + } + return cfg; +} + +static const TXFM_2D_CFG* vp10_get_inv_txfm_16x16_cfg(int tx_type) { + const TXFM_2D_CFG* cfg = NULL; + switch (tx_type) { + case DCT_DCT: + cfg = &inv_txfm_2d_cfg_dct_dct_16; + break; + case ADST_DCT: + cfg = &inv_txfm_2d_cfg_adst_dct_16; + break; + case DCT_ADST: + cfg = &inv_txfm_2d_cfg_dct_adst_16; + break; + case ADST_ADST: + cfg = &inv_txfm_2d_cfg_adst_adst_16; + break; + default: + assert(0); + } + return cfg; +} + +static const TXFM_2D_CFG* vp10_get_inv_txfm_32x32_cfg(int tx_type) { + const TXFM_2D_CFG* cfg = NULL; + switch (tx_type) { + case DCT_DCT: + cfg = &inv_txfm_2d_cfg_dct_dct_32; + break; + case ADST_DCT: + cfg = &inv_txfm_2d_cfg_adst_dct_32; + break; + case DCT_ADST: + cfg = &inv_txfm_2d_cfg_dct_adst_32; + break; + case ADST_ADST: + cfg = &inv_txfm_2d_cfg_adst_adst_32; + break; + default: + assert(0); + } + return cfg; +} + +static const TXFM_2D_CFG* vp10_get_inv_txfm_64x64_cfg(int tx_type) { + const TXFM_2D_CFG* cfg = NULL; + switch (tx_type) { + case DCT_DCT: + cfg = &inv_txfm_2d_cfg_dct_dct_64; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + default: + assert(0); + } + return cfg; +} + + static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output, int stride, const TXFM_2D_CFG *cfg, int32_t *txfm_buf) { @@ -86,61 +187,66 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output, } void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output, - const int stride, const TXFM_2D_CFG *cfg, - const int bd) { + int stride, int tx_type, + int bd) { int txfm_buf[4 * 4 + 4 + 4]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an // int16_t* + const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_4x4_cfg(tx_type); inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1); } void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, - const int stride, const TXFM_2D_CFG *cfg, - const int bd) { + int stride, int tx_type, + int bd) { int txfm_buf[8 * 8 + 8 + 8]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an // int16_t* + const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_8x8_cfg(tx_type); inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1); } void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output, - const int stride, const TXFM_2D_CFG *cfg, - const int bd) { + int stride, int tx_type, + int bd) { int txfm_buf[16 * 16 + 16 + 16]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an // int16_t* + const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_16x16_cfg(tx_type); inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1); } void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output, - const int stride, const TXFM_2D_CFG *cfg, - const int bd) { + int stride, int tx_type, + int bd) { int txfm_buf[32 * 32 + 32 + 32]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an // int16_t* + const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_32x32_cfg(tx_type); inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1); } void vp10_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output, - const int stride, const TXFM_2D_CFG *cfg, - const int bd) { + int stride, int tx_type, + int bd) { int txfm_buf[64 * 64 + 64 + 64]; // output contains the prediction signal which is always positive and smaller // than (1 << bd) - 1 // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an // int16_t* + const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_64x64_cfg(tx_type); inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf); clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1); } diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl index ae0d2cb62..0e59bfe2f 100644 --- a/vp10/common/vp10_rtcd_defs.pl +++ b/vp10/common/vp10_rtcd_defs.pl @@ -626,15 +626,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp10_fwd_txfm2d_64x64 sse4_1/; #inv txfm - add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd"; + add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd"; specialize qw/vp10_inv_txfm2d_add_4x4/; - add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd"; + add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd"; specialize qw/vp10_inv_txfm2d_add_8x8/; - add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd"; + add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd"; specialize qw/vp10_inv_txfm2d_add_16x16/; - add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd"; + add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd"; specialize qw/vp10_inv_txfm2d_add_32x32/; - add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd"; + add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd"; specialize qw/vp10_inv_txfm2d_add_64x64/; }