Adding a 64x64 transform mode
Preliminary 64x64 transform implementation. Includes all code changes. All mismatches resolved. Coding results for derf and stdhd are within noise. stdhd is slightly higher, derf is slightly lower. To be further refined. Change-Id: I091c183f62b156d23ed6f648202eb96c82e69b4b
This commit is contained in:
parent
cf608110fc
commit
0c7a94f49b
1
configure
vendored
1
configure
vendored
@ -282,6 +282,7 @@ EXPERIMENT_LIST="
|
||||
vp9_temporal_denoising
|
||||
fp_mb_stats
|
||||
emulate_hardware
|
||||
tx64x64
|
||||
"
|
||||
CONFIG_LIST="
|
||||
external_build
|
||||
|
@ -101,22 +101,35 @@ const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
|
||||
TX_4X4, TX_4X4, TX_4X4,
|
||||
TX_8X8, TX_8X8, TX_8X8,
|
||||
TX_16X16, TX_16X16, TX_16X16,
|
||||
TX_32X32, TX_32X32, TX_32X32, TX_32X32
|
||||
TX_32X32, TX_32X32, TX_32X32,
|
||||
#if CONFIG_TX64X64
|
||||
TX_64X64,
|
||||
#else
|
||||
TX_32X32,
|
||||
#endif
|
||||
};
|
||||
|
||||
const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = {
|
||||
BLOCK_4X4, // TX_4X4
|
||||
BLOCK_8X8, // TX_8X8
|
||||
BLOCK_4X4, // TX_4X4
|
||||
BLOCK_8X8, // TX_8X8
|
||||
BLOCK_16X16, // TX_16X16
|
||||
BLOCK_32X32, // TX_32X32
|
||||
#if CONFIG_TX64X64
|
||||
BLOCK_32X32, // TX_64X64
|
||||
#endif
|
||||
};
|
||||
|
||||
const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
|
||||
TX_4X4, // ONLY_4X4
|
||||
TX_8X8, // ALLOW_8X8
|
||||
TX_4X4, // ONLY_4X4
|
||||
TX_8X8, // ALLOW_8X8
|
||||
TX_16X16, // ALLOW_16X16
|
||||
TX_32X32, // ALLOW_32X32
|
||||
#if CONFIG_TX64X64
|
||||
TX_64X64, // ALLOW_64X64
|
||||
TX_64X64, // TX_MODE_SELECT
|
||||
#else
|
||||
TX_32X32, // TX_MODE_SELECT
|
||||
#endif
|
||||
};
|
||||
|
||||
const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
|
||||
|
@ -44,7 +44,7 @@ const vp9_prob vp9_cat6_prob_high12[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
const uint8_t vp9_coefband_trans_8x8plus[1024] = {
|
||||
const uint8_t vp9_coefband_trans_8x8plus[MAX_NUM_COEFS] = {
|
||||
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 5,
|
||||
// beyond MAXBAND_INDEX+1 all values are filled as 5
|
||||
@ -111,6 +111,200 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = {
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
#if CONFIG_TX64X64
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
#endif
|
||||
};
|
||||
|
||||
const uint8_t vp9_coefband_trans_4x4[16] = {
|
||||
@ -736,6 +930,92 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
|
||||
}
|
||||
};
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
static const vp9_coeff_probs_model default_coef_probs_64x64[PLANE_TYPES] = {
|
||||
{ // Y plane
|
||||
{ // Intra
|
||||
{ // Band 0
|
||||
{ 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 }
|
||||
}, { // Band 1
|
||||
{ 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 },
|
||||
{ 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 }
|
||||
}, { // Band 2
|
||||
{ 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 },
|
||||
{ 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 }
|
||||
}, { // Band 3
|
||||
{ 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 },
|
||||
{ 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 }
|
||||
}, { // Band 4
|
||||
{ 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 },
|
||||
{ 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 }
|
||||
}, { // Band 5
|
||||
{ 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 },
|
||||
{ 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 }
|
||||
}
|
||||
}, { // Inter
|
||||
{ // Band 0
|
||||
{ 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 }
|
||||
}, { // Band 1
|
||||
{ 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 },
|
||||
{ 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 }
|
||||
}, { // Band 2
|
||||
{ 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 },
|
||||
{ 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 }
|
||||
}, { // Band 3
|
||||
{ 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 },
|
||||
{ 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 }
|
||||
}, { // Band 4
|
||||
{ 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 },
|
||||
{ 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 }
|
||||
}, { // Band 5
|
||||
{ 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 },
|
||||
{ 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 }
|
||||
}
|
||||
}
|
||||
}, { // UV plane
|
||||
{ // Intra
|
||||
{ // Band 0
|
||||
{ 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 }
|
||||
}, { // Band 1
|
||||
{ 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 },
|
||||
{ 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 }
|
||||
}, { // Band 2
|
||||
{ 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 },
|
||||
{ 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 }
|
||||
}, { // Band 3
|
||||
{ 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 },
|
||||
{ 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 }
|
||||
}, { // Band 4
|
||||
{ 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 },
|
||||
{ 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 }
|
||||
}, { // Band 5
|
||||
{ 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 },
|
||||
{ 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 }
|
||||
}
|
||||
}, { // Inter
|
||||
{ // Band 0
|
||||
{ 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 }
|
||||
}, { // Band 1
|
||||
{ 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 },
|
||||
{ 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 }
|
||||
}, { // Band 2
|
||||
{ 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 },
|
||||
{ 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 }
|
||||
}, { // Band 3
|
||||
{ 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 },
|
||||
{ 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 }
|
||||
}, { // Band 4
|
||||
{ 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 },
|
||||
{ 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 }
|
||||
}, { // Band 5
|
||||
{ 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 },
|
||||
{ 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif // CONFIG_TX64X64
|
||||
|
||||
static void extend_to_full_distribution(vp9_prob *probs, vp9_prob p) {
|
||||
vpx_memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1],
|
||||
MODEL_NODES * sizeof(vp9_prob));
|
||||
@ -752,6 +1032,9 @@ void vp9_default_coef_probs(VP9_COMMON *cm) {
|
||||
vp9_copy(cm->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
|
||||
vp9_copy(cm->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
|
||||
vp9_copy(cm->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
|
||||
#if CONFIG_TX64X64
|
||||
vp9_copy(cm->fc.coef_probs[TX_64X64], default_coef_probs_64x64);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define COEF_COUNT_SAT 24
|
||||
@ -806,6 +1089,6 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) {
|
||||
update_factor = COEF_MAX_UPDATE_FACTOR;
|
||||
count_sat = COEF_COUNT_SAT;
|
||||
}
|
||||
for (t = TX_4X4; t <= TX_32X32; t++)
|
||||
for (t = TX_4X4; t < TX_SIZES; t++)
|
||||
adapt_coef_probs(cm, t, count_sat, update_factor);
|
||||
}
|
||||
|
@ -90,10 +90,20 @@ extern const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS];
|
||||
extern const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS];
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
#define DCT_MAX_VALUE 32768
|
||||
#else
|
||||
#define DCT_MAX_VALUE 16384
|
||||
#endif // CONFIG_TX64X64
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#if CONFIG_TX64X64
|
||||
#define DCT_MAX_VALUE_HIGH10 131072
|
||||
#define DCT_MAX_VALUE_HIGH12 524288
|
||||
#else
|
||||
#define DCT_MAX_VALUE_HIGH10 65536
|
||||
#define DCT_MAX_VALUE_HIGH12 262144
|
||||
#endif // CONFIG_TX64X64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
/* Coefficients are predicted via a 3-dimensional probability table. */
|
||||
@ -153,7 +163,14 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
|
||||
// This macro is currently unused but may be used by certain implementations
|
||||
#define MAXBAND_INDEX 21
|
||||
|
||||
DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]);
|
||||
#if CONFIG_TX64X64
|
||||
#define MAX_NUM_COEFS 4096
|
||||
#else
|
||||
#define MAX_NUM_COEFS 1024
|
||||
#endif
|
||||
|
||||
DECLARE_ALIGNED(16, extern const uint8_t,
|
||||
vp9_coefband_trans_8x8plus[MAX_NUM_COEFS]);
|
||||
DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]);
|
||||
|
||||
static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
|
||||
@ -204,6 +221,12 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
|
||||
above_ec = !!*(const uint64_t *)a;
|
||||
left_ec = !!*(const uint64_t *)l;
|
||||
break;
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
above_ec = !!*(const uint64_t *)a;
|
||||
left_ec = !!*(const uint64_t *)l;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
assert(0 && "Invalid transform size.");
|
||||
break;
|
||||
|
@ -229,7 +229,7 @@ const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
|
||||
-D135_PRED, -D117_PRED, /* 5 = D135_NODE */
|
||||
-D45_PRED, 14, /* 6 = D45_NODE */
|
||||
-D63_PRED, 16, /* 7 = D63_NODE */
|
||||
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
|
||||
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
|
||||
};
|
||||
|
||||
const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
|
||||
@ -265,6 +265,11 @@ static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = {
|
||||
};
|
||||
|
||||
static const struct tx_probs default_tx_probs = {
|
||||
#if CONFIG_TX64X64
|
||||
{ { 3, 3, 136, 37 },
|
||||
{ 3, 5, 52, 13 } },
|
||||
#endif
|
||||
|
||||
{ { 3, 136, 37 },
|
||||
{ 5, 52, 13 } },
|
||||
|
||||
@ -275,6 +280,26 @@ static const struct tx_probs default_tx_probs = {
|
||||
{ 66 } }
|
||||
};
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void tx_counts_to_branch_counts_64x64(const unsigned int *tx_count_64x64p,
|
||||
unsigned int (*ct_64x64p)[2]) {
|
||||
ct_64x64p[0][0] = tx_count_64x64p[TX_4X4];
|
||||
ct_64x64p[0][1] = tx_count_64x64p[TX_8X8] +
|
||||
tx_count_64x64p[TX_16X16] +
|
||||
tx_count_64x64p[TX_32X32] +
|
||||
tx_count_64x64p[TX_64X64];
|
||||
ct_64x64p[1][0] = tx_count_64x64p[TX_8X8];
|
||||
ct_64x64p[1][1] = tx_count_64x64p[TX_16X16] +
|
||||
tx_count_64x64p[TX_32X32] +
|
||||
tx_count_64x64p[TX_64X64];
|
||||
ct_64x64p[2][0] = tx_count_64x64p[TX_16X16];
|
||||
ct_64x64p[2][1] = tx_count_64x64p[TX_32X32] +
|
||||
tx_count_64x64p[TX_64X64];
|
||||
ct_64x64p[3][0] = tx_count_64x64p[TX_32X32];
|
||||
ct_64x64p[3][1] = tx_count_64x64p[TX_64X64];
|
||||
}
|
||||
#endif
|
||||
|
||||
void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
|
||||
unsigned int (*ct_32x32p)[2]) {
|
||||
ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
|
||||
@ -392,25 +417,34 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
|
||||
|
||||
if (cm->tx_mode == TX_MODE_SELECT) {
|
||||
int j;
|
||||
unsigned int branch_ct_8x8p[TX_SIZES - 3][2];
|
||||
unsigned int branch_ct_16x16p[TX_SIZES - 2][2];
|
||||
unsigned int branch_ct_32x32p[TX_SIZES - 1][2];
|
||||
unsigned int branch_ct_8x8p[1][2];
|
||||
unsigned int branch_ct_16x16p[2][2];
|
||||
unsigned int branch_ct_32x32p[3][2];
|
||||
#if CONFIG_TX64X64
|
||||
unsigned int branch_ct_64x64p[4][2];
|
||||
#endif
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
|
||||
tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
|
||||
for (j = 0; j < TX_SIZES - 3; ++j)
|
||||
for (j = 0; j < 1; ++j)
|
||||
fc->tx_probs.p8x8[i][j] = adapt_prob(pre_fc->tx_probs.p8x8[i][j],
|
||||
branch_ct_8x8p[j]);
|
||||
|
||||
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
|
||||
for (j = 0; j < TX_SIZES - 2; ++j)
|
||||
for (j = 0; j < 2; ++j)
|
||||
fc->tx_probs.p16x16[i][j] = adapt_prob(pre_fc->tx_probs.p16x16[i][j],
|
||||
branch_ct_16x16p[j]);
|
||||
|
||||
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
|
||||
for (j = 0; j < TX_SIZES - 1; ++j)
|
||||
for (j = 0; j < 3; ++j)
|
||||
fc->tx_probs.p32x32[i][j] = adapt_prob(pre_fc->tx_probs.p32x32[i][j],
|
||||
branch_ct_32x32p[j]);
|
||||
#if CONFIG_TX64X64
|
||||
tx_counts_to_branch_counts_64x64(counts->tx.p64x64[i], branch_ct_64x64p);
|
||||
for (j = 0; j < 4; ++j)
|
||||
fc->tx_probs.p64x64[i][j] = adapt_prob(pre_fc->tx_probs.p64x64[i][j],
|
||||
branch_ct_64x64p[j]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,15 +24,21 @@ extern "C" {
|
||||
struct VP9Common;
|
||||
|
||||
struct tx_probs {
|
||||
vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
|
||||
vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
|
||||
vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
|
||||
#if CONFIG_TX64X64
|
||||
vp9_prob p64x64[TX_SIZE_CONTEXTS][4];
|
||||
#endif
|
||||
vp9_prob p32x32[TX_SIZE_CONTEXTS][3];
|
||||
vp9_prob p16x16[TX_SIZE_CONTEXTS][2];
|
||||
vp9_prob p8x8[TX_SIZE_CONTEXTS][1];
|
||||
};
|
||||
|
||||
struct tx_counts {
|
||||
unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES];
|
||||
unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1];
|
||||
unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
|
||||
#if CONFIG_TX64X64
|
||||
unsigned int p64x64[TX_SIZE_CONTEXTS][5];
|
||||
#endif
|
||||
unsigned int p32x32[TX_SIZE_CONTEXTS][4];
|
||||
unsigned int p16x16[TX_SIZE_CONTEXTS][3];
|
||||
unsigned int p8x8[TX_SIZE_CONTEXTS][2];
|
||||
};
|
||||
|
||||
typedef struct frame_contexts {
|
||||
@ -88,6 +94,10 @@ void vp9_init_mode_probs(FRAME_CONTEXT *fc);
|
||||
|
||||
void vp9_adapt_mode_probs(struct VP9Common *cm);
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void tx_counts_to_branch_counts_64x64(const unsigned int *tx_count_64x64p,
|
||||
unsigned int (*ct_64x64p)[2]);
|
||||
#endif
|
||||
void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
|
||||
unsigned int (*ct_32x32p)[2]);
|
||||
void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
|
||||
|
@ -77,6 +77,9 @@ typedef enum {
|
||||
TX_8X8 = 1, // 8x8 transform
|
||||
TX_16X16 = 2, // 16x16 transform
|
||||
TX_32X32 = 3, // 32x32 transform
|
||||
#if CONFIG_TX64X64
|
||||
TX_64X64 = 4, // 64x64 transform
|
||||
#endif
|
||||
TX_SIZES
|
||||
} TX_SIZE;
|
||||
|
||||
@ -86,8 +89,11 @@ typedef enum {
|
||||
ALLOW_8X8 = 1, // allow block transform size up to 8x8
|
||||
ALLOW_16X16 = 2, // allow block transform size up to 16x16
|
||||
ALLOW_32X32 = 3, // allow block transform size up to 32x32
|
||||
TX_MODE_SELECT = 4, // transform specified for each block
|
||||
TX_MODES = 5,
|
||||
#if CONFIG_TX64X64
|
||||
ALLOW_64X64 = 4, // allow block transform size up to 32x32
|
||||
#endif
|
||||
TX_MODE_SELECT, // transform specified for each block
|
||||
TX_MODES,
|
||||
} TX_MODE;
|
||||
|
||||
typedef enum {
|
||||
|
@ -1457,6 +1457,458 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
#define DownshiftMultiplyBy2(x) x * 2
|
||||
#define DownshiftMultiply(x) x
|
||||
|
||||
static void idct16f(double *input, double *output, int stride) {
|
||||
static const double C1 = 0.995184726672197;
|
||||
static const double C2 = 0.98078528040323;
|
||||
static const double C3 = 0.956940335732209;
|
||||
static const double C4 = 0.923879532511287;
|
||||
static const double C5 = 0.881921264348355;
|
||||
static const double C6 = 0.831469612302545;
|
||||
static const double C7 = 0.773010453362737;
|
||||
static const double C8 = 0.707106781186548;
|
||||
static const double C9 = 0.634393284163646;
|
||||
static const double C10 = 0.555570233019602;
|
||||
static const double C11 = 0.471396736825998;
|
||||
static const double C12 = 0.38268343236509;
|
||||
static const double C13 = 0.290284677254462;
|
||||
static const double C14 = 0.195090322016128;
|
||||
static const double C15 = 0.098017140329561;
|
||||
|
||||
double step[16];
|
||||
double intermediate[16];
|
||||
double temp1, temp2;
|
||||
|
||||
// step 1 and 2
|
||||
step[ 0] = input[stride*0] + input[stride*8];
|
||||
step[ 1] = input[stride*0] - input[stride*8];
|
||||
|
||||
temp1 = input[stride*4]*C12;
|
||||
temp2 = input[stride*12]*C4;
|
||||
|
||||
temp1 -= temp2;
|
||||
temp1 = DownshiftMultiply(temp1);
|
||||
temp1 *= C8;
|
||||
|
||||
step[ 2] = DownshiftMultiplyBy2(temp1);
|
||||
|
||||
temp1 = input[stride*4]*C4;
|
||||
temp2 = input[stride*12]*C12;
|
||||
temp1 += temp2;
|
||||
temp1 = DownshiftMultiply(temp1);
|
||||
temp1 *= C8;
|
||||
step[ 3] = DownshiftMultiplyBy2(temp1);
|
||||
|
||||
temp1 = input[stride*2]*C8;
|
||||
temp1 = DownshiftMultiplyBy2(temp1);
|
||||
temp2 = input[stride*6] + input[stride*10];
|
||||
|
||||
step[ 4] = temp1 + temp2;
|
||||
step[ 5] = temp1 - temp2;
|
||||
|
||||
temp1 = input[stride*14]*C8;
|
||||
temp1 = DownshiftMultiplyBy2(temp1);
|
||||
temp2 = input[stride*6] - input[stride*10];
|
||||
|
||||
step[ 6] = temp2 - temp1;
|
||||
step[ 7] = temp2 + temp1;
|
||||
|
||||
// for odd input
|
||||
temp1 = input[stride*3]*C12;
|
||||
temp2 = input[stride*13]*C4;
|
||||
temp1 += temp2;
|
||||
temp1 = DownshiftMultiply(temp1);
|
||||
temp1 *= C8;
|
||||
intermediate[ 8] = DownshiftMultiplyBy2(temp1);
|
||||
|
||||
temp1 = input[stride*3]*C4;
|
||||
temp2 = input[stride*13]*C12;
|
||||
temp2 -= temp1;
|
||||
temp2 = DownshiftMultiply(temp2);
|
||||
temp2 *= C8;
|
||||
intermediate[ 9] = DownshiftMultiplyBy2(temp2);
|
||||
|
||||
intermediate[10] = DownshiftMultiplyBy2(input[stride*9]*C8);
|
||||
intermediate[11] = input[stride*15] - input[stride*1];
|
||||
intermediate[12] = input[stride*15] + input[stride*1];
|
||||
intermediate[13] = DownshiftMultiplyBy2((input[stride*7]*C8));
|
||||
|
||||
temp1 = input[stride*11]*C12;
|
||||
temp2 = input[stride*5]*C4;
|
||||
temp2 -= temp1;
|
||||
temp2 = DownshiftMultiply(temp2);
|
||||
temp2 *= C8;
|
||||
intermediate[14] = DownshiftMultiplyBy2(temp2);
|
||||
|
||||
temp1 = input[stride*11]*C4;
|
||||
temp2 = input[stride*5]*C12;
|
||||
temp1 += temp2;
|
||||
temp1 = DownshiftMultiply(temp1);
|
||||
temp1 *= C8;
|
||||
intermediate[15] = DownshiftMultiplyBy2(temp1);
|
||||
|
||||
step[ 8] = intermediate[ 8] + intermediate[14];
|
||||
step[ 9] = intermediate[ 9] + intermediate[15];
|
||||
step[10] = intermediate[10] + intermediate[11];
|
||||
step[11] = intermediate[10] - intermediate[11];
|
||||
step[12] = intermediate[12] + intermediate[13];
|
||||
step[13] = intermediate[12] - intermediate[13];
|
||||
step[14] = intermediate[ 8] - intermediate[14];
|
||||
step[15] = intermediate[ 9] - intermediate[15];
|
||||
|
||||
// step 3
|
||||
output[stride*0] = step[ 0] + step[ 3];
|
||||
output[stride*1] = step[ 1] + step[ 2];
|
||||
output[stride*2] = step[ 1] - step[ 2];
|
||||
output[stride*3] = step[ 0] - step[ 3];
|
||||
|
||||
temp1 = step[ 4]*C14;
|
||||
temp2 = step[ 7]*C2;
|
||||
temp1 -= temp2;
|
||||
output[stride*4] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = step[ 4]*C2;
|
||||
temp2 = step[ 7]*C14;
|
||||
temp1 += temp2;
|
||||
output[stride*7] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = step[ 5]*C10;
|
||||
temp2 = step[ 6]*C6;
|
||||
temp1 -= temp2;
|
||||
output[stride*5] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = step[ 5]*C6;
|
||||
temp2 = step[ 6]*C10;
|
||||
temp1 += temp2;
|
||||
output[stride*6] = DownshiftMultiply(temp1);
|
||||
|
||||
output[stride*8] = step[ 8] + step[11];
|
||||
output[stride*9] = step[ 9] + step[10];
|
||||
output[stride*10] = step[ 9] - step[10];
|
||||
output[stride*11] = step[ 8] - step[11];
|
||||
output[stride*12] = step[12] + step[15];
|
||||
output[stride*13] = step[13] + step[14];
|
||||
output[stride*14] = step[13] - step[14];
|
||||
output[stride*15] = step[12] - step[15];
|
||||
|
||||
// output 4
|
||||
step[ 0] = output[stride*0] + output[stride*7];
|
||||
step[ 1] = output[stride*1] + output[stride*6];
|
||||
step[ 2] = output[stride*2] + output[stride*5];
|
||||
step[ 3] = output[stride*3] + output[stride*4];
|
||||
step[ 4] = output[stride*3] - output[stride*4];
|
||||
step[ 5] = output[stride*2] - output[stride*5];
|
||||
step[ 6] = output[stride*1] - output[stride*6];
|
||||
step[ 7] = output[stride*0] - output[stride*7];
|
||||
|
||||
temp1 = output[stride*8]*C7;
|
||||
temp2 = output[stride*15]*C9;
|
||||
temp1 -= temp2;
|
||||
step[ 8] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = output[stride*9]*C11;
|
||||
temp2 = output[stride*14]*C5;
|
||||
temp1 += temp2;
|
||||
step[ 9] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = output[stride*10]*C3;
|
||||
temp2 = output[stride*13]*C13;
|
||||
temp1 -= temp2;
|
||||
step[10] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = output[stride*11]*C15;
|
||||
temp2 = output[stride*12]*C1;
|
||||
temp1 += temp2;
|
||||
step[11] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = output[stride*11]*C1;
|
||||
temp2 = output[stride*12]*C15;
|
||||
temp2 -= temp1;
|
||||
step[12] = DownshiftMultiply(temp2);
|
||||
|
||||
temp1 = output[stride*10]*C13;
|
||||
temp2 = output[stride*13]*C3;
|
||||
temp1 += temp2;
|
||||
step[13] = DownshiftMultiply(temp1);
|
||||
|
||||
temp1 = output[stride*9]*C5;
|
||||
temp2 = output[stride*14]*C11;
|
||||
temp2 -= temp1;
|
||||
step[14] = DownshiftMultiply(temp2);
|
||||
|
||||
temp1 = output[stride*8]*C9;
|
||||
temp2 = output[stride*15]*C7;
|
||||
temp1 += temp2;
|
||||
step[15] = DownshiftMultiply(temp1);
|
||||
|
||||
// step 5
|
||||
output[stride*0] = step[0] + step[15];
|
||||
output[stride*1] = step[1] + step[14];
|
||||
output[stride*2] = step[2] + step[13];
|
||||
output[stride*3] = step[3] + step[12];
|
||||
output[stride*4] = step[4] + step[11];
|
||||
output[stride*5] = step[5] + step[10];
|
||||
output[stride*6] = step[6] + step[ 9];
|
||||
output[stride*7] = step[7] + step[ 8];
|
||||
|
||||
output[stride*15] = step[0] - step[15];
|
||||
output[stride*14] = step[1] - step[14];
|
||||
output[stride*13] = step[2] - step[13];
|
||||
output[stride*12] = step[3] - step[12];
|
||||
output[stride*11] = step[4] - step[11];
|
||||
output[stride*10] = step[5] - step[10];
|
||||
output[stride*9] = step[6] - step[ 9];
|
||||
output[stride*8] = step[7] - step[ 8];
|
||||
}
|
||||
|
||||
static void butterfly_32_idct_1d(double *input, double *output, int stride) {
|
||||
static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
|
||||
static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
|
||||
static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
|
||||
static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
|
||||
static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
|
||||
static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
|
||||
static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
|
||||
static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
|
||||
static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
|
||||
static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
|
||||
static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
|
||||
static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
|
||||
static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
|
||||
static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
|
||||
static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
|
||||
static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
|
||||
static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
|
||||
|
||||
double step1[32];
|
||||
double step2[32];
|
||||
|
||||
step1[ 0] = input[stride*0];
|
||||
step1[ 1] = input[stride*2];
|
||||
step1[ 2] = input[stride*4];
|
||||
step1[ 3] = input[stride*6];
|
||||
step1[ 4] = input[stride*8];
|
||||
step1[ 5] = input[stride*10];
|
||||
step1[ 6] = input[stride*12];
|
||||
step1[ 7] = input[stride*14];
|
||||
step1[ 8] = input[stride*16];
|
||||
step1[ 9] = input[stride*18];
|
||||
step1[10] = input[stride*20];
|
||||
step1[11] = input[stride*22];
|
||||
step1[12] = input[stride*24];
|
||||
step1[13] = input[stride*26];
|
||||
step1[14] = input[stride*28];
|
||||
step1[15] = input[stride*30];
|
||||
|
||||
step1[16] = DownshiftMultiplyBy2(input[stride*1]*C16);
|
||||
step1[17] = (input[stride*3] + input[stride*1]);
|
||||
step1[18] = (input[stride*5] + input[stride*3]);
|
||||
step1[19] = (input[stride*7] + input[stride*5]);
|
||||
step1[20] = (input[stride*9] + input[stride*7]);
|
||||
step1[21] = (input[stride*11] + input[stride*9]);
|
||||
step1[22] = (input[stride*13] + input[stride*11]);
|
||||
step1[23] = (input[stride*15] + input[stride*13]);
|
||||
step1[24] = (input[stride*17] + input[stride*15]);
|
||||
step1[25] = (input[stride*19] + input[stride*17]);
|
||||
step1[26] = (input[stride*21] + input[stride*19]);
|
||||
step1[27] = (input[stride*23] + input[stride*21]);
|
||||
step1[28] = (input[stride*25] + input[stride*23]);
|
||||
step1[29] = (input[stride*27] + input[stride*25]);
|
||||
step1[30] = (input[stride*29] + input[stride*27]);
|
||||
step1[31] = (input[stride*31] + input[stride*29]);
|
||||
|
||||
idct16f(step1, step2, 1);
|
||||
idct16f(step1 + 16, step2 + 16, 1);
|
||||
|
||||
step2[16] = DownshiftMultiply(step2[16] / (2*C1));
|
||||
step2[17] = DownshiftMultiply(step2[17] / (2*C3));
|
||||
step2[18] = DownshiftMultiply(step2[18] / (2*C5));
|
||||
step2[19] = DownshiftMultiply(step2[19] / (2*C7));
|
||||
step2[20] = DownshiftMultiply(step2[20] / (2*C9));
|
||||
step2[21] = DownshiftMultiply(step2[21] / (2*C11));
|
||||
step2[22] = DownshiftMultiply(step2[22] / (2*C13));
|
||||
step2[23] = DownshiftMultiply(step2[23] / (2*C15));
|
||||
step2[24] = DownshiftMultiply(step2[24] / (2*C17));
|
||||
step2[25] = DownshiftMultiply(step2[25] / (2*C19));
|
||||
step2[26] = DownshiftMultiply(step2[26] / (2*C21));
|
||||
step2[27] = DownshiftMultiply(step2[27] / (2*C23));
|
||||
step2[28] = DownshiftMultiply(step2[28] / (2*C25));
|
||||
step2[29] = DownshiftMultiply(step2[29] / (2*C27));
|
||||
step2[30] = DownshiftMultiply(step2[30] / (2*C29));
|
||||
step2[31] = DownshiftMultiply(step2[31] / (2*C31));
|
||||
|
||||
output[stride* 0] = step2[ 0] + step2[16];
|
||||
output[stride* 1] = step2[ 1] + step2[17];
|
||||
output[stride* 2] = step2[ 2] + step2[18];
|
||||
output[stride* 3] = step2[ 3] + step2[19];
|
||||
output[stride* 4] = step2[ 4] + step2[20];
|
||||
output[stride* 5] = step2[ 5] + step2[21];
|
||||
output[stride* 6] = step2[ 6] + step2[22];
|
||||
output[stride* 7] = step2[ 7] + step2[23];
|
||||
output[stride* 8] = step2[ 8] + step2[24];
|
||||
output[stride* 9] = step2[ 9] + step2[25];
|
||||
output[stride*10] = step2[10] + step2[26];
|
||||
output[stride*11] = step2[11] + step2[27];
|
||||
output[stride*12] = step2[12] + step2[28];
|
||||
output[stride*13] = step2[13] + step2[29];
|
||||
output[stride*14] = step2[14] + step2[30];
|
||||
output[stride*15] = step2[15] + step2[31];
|
||||
output[stride*16] = step2[15] - step2[(31 - 0)];
|
||||
output[stride*17] = step2[14] - step2[(31 - 1)];
|
||||
output[stride*18] = step2[13] - step2[(31 - 2)];
|
||||
output[stride*19] = step2[12] - step2[(31 - 3)];
|
||||
output[stride*20] = step2[11] - step2[(31 - 4)];
|
||||
output[stride*21] = step2[10] - step2[(31 - 5)];
|
||||
output[stride*22] = step2[ 9] - step2[(31 - 6)];
|
||||
output[stride*23] = step2[ 8] - step2[(31 - 7)];
|
||||
output[stride*24] = step2[ 7] - step2[(31 - 8)];
|
||||
output[stride*25] = step2[ 6] - step2[(31 - 9)];
|
||||
output[stride*26] = step2[ 5] - step2[(31 - 10)];
|
||||
output[stride*27] = step2[ 4] - step2[(31 - 11)];
|
||||
output[stride*28] = step2[ 3] - step2[(31 - 12)];
|
||||
output[stride*29] = step2[ 2] - step2[(31 - 13)];
|
||||
output[stride*30] = step2[ 1] - step2[(31 - 14)];
|
||||
output[stride*31] = step2[ 0] - step2[(31 - 15)];
|
||||
}
|
||||
|
||||
static void butterfly_64_idct_1d(double *input, double *output, int stride) {
|
||||
double step1[64], step2[64];
|
||||
int i;
|
||||
static const double C[64] = {
|
||||
1.00000000000000000000, // cos(0 * pi / 128)
|
||||
0.99969881869620424997, // cos(1 * pi / 128)
|
||||
0.99879545620517240501, // cos(2 * pi / 128)
|
||||
0.99729045667869020697, // cos(3 * pi / 128)
|
||||
0.99518472667219692873, // cos(4 * pi / 128)
|
||||
0.99247953459870996706, // cos(5 * pi / 128)
|
||||
0.98917650996478101444, // cos(6 * pi / 128)
|
||||
0.98527764238894122162, // cos(7 * pi / 128)
|
||||
0.98078528040323043058, // cos(8 * pi / 128)
|
||||
0.97570213003852857003, // cos(9 * pi / 128)
|
||||
0.97003125319454397424, // cos(10 * pi / 128)
|
||||
0.96377606579543984022, // cos(11 * pi / 128)
|
||||
0.95694033573220882438, // cos(12 * pi / 128)
|
||||
0.94952818059303667475, // cos(13 * pi / 128)
|
||||
0.94154406518302080631, // cos(14 * pi / 128)
|
||||
0.93299279883473895669, // cos(15 * pi / 128)
|
||||
0.92387953251128673848, // cos(16 * pi / 128)
|
||||
0.91420975570353069095, // cos(17 * pi / 128)
|
||||
0.90398929312344333820, // cos(18 * pi / 128)
|
||||
0.89322430119551532446, // cos(19 * pi / 128)
|
||||
0.88192126434835504956, // cos(20 * pi / 128)
|
||||
0.87008699110871146054, // cos(21 * pi / 128)
|
||||
0.85772861000027211809, // cos(22 * pi / 128)
|
||||
0.84485356524970711689, // cos(23 * pi / 128)
|
||||
0.83146961230254523567, // cos(24 * pi / 128)
|
||||
0.81758481315158371139, // cos(25 * pi / 128)
|
||||
0.80320753148064494287, // cos(26 * pi / 128)
|
||||
0.78834642762660633863, // cos(27 * pi / 128)
|
||||
0.77301045336273699338, // cos(28 * pi / 128)
|
||||
0.75720884650648456748, // cos(29 * pi / 128)
|
||||
0.74095112535495921691, // cos(30 * pi / 128)
|
||||
0.72424708295146700276, // cos(31 * pi / 128)
|
||||
0.70710678118654757274, // cos(32 * pi / 128)
|
||||
0.68954054473706694051, // cos(33 * pi / 128)
|
||||
0.67155895484701844111, // cos(34 * pi / 128)
|
||||
0.65317284295377686654, // cos(35 * pi / 128)
|
||||
0.63439328416364559882, // cos(36 * pi / 128)
|
||||
0.61523159058062693028, // cos(37 * pi / 128)
|
||||
0.59569930449243346793, // cos(38 * pi / 128)
|
||||
0.57580819141784544968, // cos(39 * pi / 128)
|
||||
0.55557023301960228867, // cos(40 * pi / 128)
|
||||
0.53499761988709737537, // cos(41 * pi / 128)
|
||||
0.51410274419322177231, // cos(42 * pi / 128)
|
||||
0.49289819222978414892, // cos(43 * pi / 128)
|
||||
0.47139673682599780857, // cos(44 * pi / 128)
|
||||
0.44961132965460659516, // cos(45 * pi / 128)
|
||||
0.42755509343028219593, // cos(46 * pi / 128)
|
||||
0.40524131400498980549, // cos(47 * pi / 128)
|
||||
0.38268343236508983729, // cos(48 * pi / 128)
|
||||
0.35989503653498827740, // cos(49 * pi / 128)
|
||||
0.33688985339222005111, // cos(50 * pi / 128)
|
||||
0.31368174039889151761, // cos(51 * pi / 128)
|
||||
0.29028467725446227554, // cos(52 * pi / 128)
|
||||
0.26671275747489842090, // cos(53 * pi / 128)
|
||||
0.24298017990326398197, // cos(54 * pi / 128)
|
||||
0.21910124015686976984, // cos(55 * pi / 128)
|
||||
0.19509032201612830359, // cos(56 * pi / 128)
|
||||
0.17096188876030135595, // cos(57 * pi / 128)
|
||||
0.14673047445536174793, // cos(58 * pi / 128)
|
||||
0.12241067519921627893, // cos(59 * pi / 128)
|
||||
0.09801714032956077016, // cos(60 * pi / 128)
|
||||
0.07356456359966745406, // cos(61 * pi / 128)
|
||||
0.04906767432741813290, // cos(62 * pi / 128)
|
||||
0.02454122852291226731, // cos(63 * pi / 128)
|
||||
};
|
||||
|
||||
for (i = 0; i < 64; i += 2) {
|
||||
step1[i / 2] = input[stride * i];
|
||||
}
|
||||
step1[32] = DownshiftMultiplyBy2(input[stride*1] * C[32]);
|
||||
for (i = 3; i < 64; i+=2) {
|
||||
step1[32 + i/2] = (input[stride * i] + input[stride * (i - 2)]);
|
||||
}
|
||||
|
||||
butterfly_32_idct_1d(step1, step2, 1);
|
||||
butterfly_32_idct_1d(step1 + 32, step2 + 32, 1);
|
||||
|
||||
for (i = 32; i < 64; ++i) {
|
||||
step2[i] = DownshiftMultiply(step2[i] / (2 * C[(i - 32) * 2 + 1]));
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
output[stride * i] = step2[i] + step2[32 + i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
output[stride * (i + 32)] = step2[31 - i] - step2[63 - i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_idct64x64_4096_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride) {
|
||||
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
|
||||
{
|
||||
double out[64 * 64], out2[64 * 64];
|
||||
int i, j;
|
||||
// First transform rows
|
||||
for (i = 0; i < 64; ++i) {
|
||||
double temp_in[64], temp_out[64];
|
||||
for (j = 0; j < 64; ++j)
|
||||
temp_in[j] = input[j + i * 64];
|
||||
butterfly_64_idct_1d(temp_in, temp_out, 1);
|
||||
for (j = 0; j < 64; ++j)
|
||||
out[j + i * 64] = temp_out[j];
|
||||
}
|
||||
// Then transform columns
|
||||
for (i = 0; i < 64; ++i) {
|
||||
double temp_in[64], temp_out[64];
|
||||
for (j = 0; j < 64; ++j)
|
||||
temp_in[j] = out[j * 64 + i];
|
||||
butterfly_64_idct_1d(temp_in, temp_out, 1);
|
||||
for (j = 0; j < 64; ++j)
|
||||
out2[j * 64 + i] = temp_out[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < 64; ++j) {
|
||||
for (i = 0; i < 64; ++i)
|
||||
dest[i] = clip_pixel_add(dest[i], round(out2[j * 64 + i] / 128));
|
||||
dest += stride;
|
||||
}
|
||||
}
|
||||
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
|
||||
}
|
||||
|
||||
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob) {
|
||||
(void) eob;
|
||||
vp9_idct64x64_4096_add_c(input, dest, stride);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
||||
int stride, int bd) {
|
||||
@ -2899,4 +3351,47 @@ void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_highbd_idct64x64_4096_add_c(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int bd) {
|
||||
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
|
||||
{
|
||||
double out[64 * 64], out2[64 * 64];
|
||||
int i, j;
|
||||
// First transform rows
|
||||
for (i = 0; i < 64; ++i) {
|
||||
double temp_in[64], temp_out[64];
|
||||
for (j = 0; j < 64; ++j)
|
||||
temp_in[j] = input[j + i * 64];
|
||||
butterfly_64_idct_1d(temp_in, temp_out, 1);
|
||||
for (j = 0; j < 64; ++j)
|
||||
out[j + i * 64] = temp_out[j];
|
||||
}
|
||||
// Then transform columns
|
||||
for (i = 0; i < 64; ++i) {
|
||||
double temp_in[64], temp_out[64];
|
||||
for (j = 0; j < 64; ++j)
|
||||
temp_in[j] = out[j * 64 + i];
|
||||
butterfly_64_idct_1d(temp_in, temp_out, 1);
|
||||
for (j = 0; j < 64; ++j)
|
||||
out2[j * 64 + i] = temp_out[j];
|
||||
}
|
||||
|
||||
for (j = 0; j < 64; ++j) {
|
||||
for (i = 0; i < 64; ++i)
|
||||
dest[j * stride + i] = highbd_clip_pixel_add(
|
||||
dest[j * 64 + i], round(out2[j * 64 + i] / 128), bd);
|
||||
dest += stride;
|
||||
}
|
||||
}
|
||||
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
|
||||
}
|
||||
|
||||
void vp9_highbd_idct64x64_add(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd) {
|
||||
(void) eob;
|
||||
vp9_highbd_idct64x64_4096_add_c(input, dest, stride, bd);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -122,11 +122,14 @@ void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob);
|
||||
void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob);
|
||||
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
|
||||
eob);
|
||||
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob);
|
||||
void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob);
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
|
||||
int eob);
|
||||
#endif
|
||||
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob);
|
||||
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
||||
@ -145,6 +148,10 @@ void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd);
|
||||
void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd);
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_highbd_idct64x64_add(const tran_low_t *input, uint8_t *dest,
|
||||
int stride, int eob, int bd);
|
||||
#endif
|
||||
void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
uint8_t *dest, int stride, int eob, int bd);
|
||||
void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
|
||||
|
@ -38,6 +38,9 @@ static const uint64_t left_64x64_txform_mask[TX_SIZES]= {
|
||||
0xffffffffffffffff, // TX_8x8
|
||||
0x5555555555555555, // TX_16x16
|
||||
0x1111111111111111, // TX_32x32
|
||||
#if CONFIG_TX64X64
|
||||
0x0101010101010101, // TX_64x64
|
||||
#endif
|
||||
};
|
||||
|
||||
// 64 bit masks for above transform size. Each 1 represents a position where
|
||||
@ -62,6 +65,9 @@ static const uint64_t above_64x64_txform_mask[TX_SIZES]= {
|
||||
0xffffffffffffffff, // TX_8x8
|
||||
0x00ff00ff00ff00ff, // TX_16x16
|
||||
0x000000ff000000ff, // TX_32x32
|
||||
#if CONFIG_TX64X64
|
||||
0x00000000000000ff, // TX_64x64
|
||||
#endif
|
||||
};
|
||||
|
||||
// 64 bit masks for prediction sizes (left). Each 1 represents a position
|
||||
@ -140,6 +146,9 @@ static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= {
|
||||
0xffff, // TX_8x8
|
||||
0x5555, // TX_16x16
|
||||
0x1111, // TX_32x32
|
||||
#if CONFIG_TX64X64
|
||||
0x0101, // TX_64x64, never used
|
||||
#endif
|
||||
};
|
||||
|
||||
static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
|
||||
@ -147,6 +156,9 @@ static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
|
||||
0xffff, // TX_8x8
|
||||
0x0f0f, // TX_16x16
|
||||
0x000f, // TX_32x32
|
||||
#if CONFIG_TX64X64
|
||||
0x0003, // TX_64x64, never used
|
||||
#endif
|
||||
};
|
||||
|
||||
// 16 bit left mask to shift and set for each uv prediction size.
|
||||
|
@ -107,6 +107,10 @@ static INLINE const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
|
||||
return tx_probs->p16x16[ctx];
|
||||
case TX_32X32:
|
||||
return tx_probs->p32x32[ctx];
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
return tx_probs->p64x64[ctx];
|
||||
#endif
|
||||
default:
|
||||
assert(0 && "Invalid max_tx_size.");
|
||||
return NULL;
|
||||
@ -128,6 +132,10 @@ static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
|
||||
return tx_counts->p16x16[ctx];
|
||||
case TX_32X32:
|
||||
return tx_counts->p32x32[ctx];
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
return tx_counts->p64x64[ctx];
|
||||
#endif
|
||||
default:
|
||||
assert(0 && "Invalid max_tx_size.");
|
||||
return NULL;
|
||||
|
@ -47,7 +47,34 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
|
||||
const uint16_t *left, int bd) { \
|
||||
highbd_##type##_predictor(dst, stride, size, above, left, bd); \
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#define intra_pred_allsizes(type) \
|
||||
intra_pred_sized(type, 4) \
|
||||
intra_pred_sized(type, 8) \
|
||||
intra_pred_sized(type, 16) \
|
||||
intra_pred_sized(type, 32) \
|
||||
intra_pred_sized(type, 64) \
|
||||
intra_pred_highbd_sized(type, 4) \
|
||||
intra_pred_highbd_sized(type, 8) \
|
||||
intra_pred_highbd_sized(type, 16) \
|
||||
intra_pred_highbd_sized(type, 32) \
|
||||
intra_pred_highbd_sized(type, 64)
|
||||
#else
|
||||
#define intra_pred_allsizes(type) \
|
||||
intra_pred_sized(type, 4) \
|
||||
intra_pred_sized(type, 8) \
|
||||
intra_pred_sized(type, 16) \
|
||||
intra_pred_sized(type, 32) \
|
||||
intra_pred_sized(type, 64)
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#else // CONFIG_TX64X64
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#define intra_pred_allsizes(type) \
|
||||
intra_pred_sized(type, 4) \
|
||||
intra_pred_sized(type, 8) \
|
||||
@ -57,9 +84,7 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
|
||||
intra_pred_highbd_sized(type, 8) \
|
||||
intra_pred_highbd_sized(type, 16) \
|
||||
intra_pred_highbd_sized(type, 32)
|
||||
|
||||
#else
|
||||
|
||||
#define intra_pred_allsizes(type) \
|
||||
intra_pred_sized(type, 4) \
|
||||
intra_pred_sized(type, 8) \
|
||||
@ -67,6 +92,8 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
|
||||
intra_pred_sized(type, 32)
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#endif // CONFIG_TX64X64
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
|
||||
int bs, const uint16_t *above,
|
||||
@ -575,16 +602,25 @@ static intra_pred_fn dc_pred[2][2][TX_SIZES];
|
||||
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
|
||||
const uint16_t *above, const uint16_t *left,
|
||||
int bd);
|
||||
static intra_high_pred_fn pred_high[INTRA_MODES][4];
|
||||
static intra_high_pred_fn dc_pred_high[2][2][4];
|
||||
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES];
|
||||
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES];
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_init_intra_predictors() {
|
||||
#if CONFIG_TX64X64
|
||||
#define INIT_ALL_SIZES(p, type) \
|
||||
p[TX_4X4] = vp9_##type##_predictor_4x4; \
|
||||
p[TX_8X8] = vp9_##type##_predictor_8x8; \
|
||||
p[TX_16X16] = vp9_##type##_predictor_16x16; \
|
||||
p[TX_32X32] = vp9_##type##_predictor_32x32; \
|
||||
p[TX_64X64] = vp9_##type##_predictor_64x64
|
||||
#else
|
||||
#define INIT_ALL_SIZES(p, type) \
|
||||
p[TX_4X4] = vp9_##type##_predictor_4x4; \
|
||||
p[TX_8X8] = vp9_##type##_predictor_8x8; \
|
||||
p[TX_16X16] = vp9_##type##_predictor_16x16; \
|
||||
p[TX_32X32] = vp9_##type##_predictor_32x32
|
||||
#endif
|
||||
|
||||
INIT_ALL_SIZES(pred[V_PRED], v);
|
||||
INIT_ALL_SIZES(pred[H_PRED], h);
|
||||
@ -638,7 +674,11 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
||||
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64);
|
||||
#if CONFIG_TX64X64
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 256 + 16);
|
||||
#else
|
||||
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16);
|
||||
#endif
|
||||
uint16_t *above_row = above_data + 16;
|
||||
const uint16_t *const_above_row = above_row;
|
||||
const int bs = 4 << tx_size;
|
||||
@ -767,7 +807,11 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
|
||||
int plane) {
|
||||
int i;
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
|
||||
#if CONFIG_TX64X64
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 256 + 16);
|
||||
#else
|
||||
DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16);
|
||||
#endif
|
||||
uint8_t *above_row = above_data + 16;
|
||||
const uint8_t *const_above_row = above_row;
|
||||
const int bs = 4 << tx_size;
|
||||
|
@ -224,6 +224,47 @@ specialize qw/vp9_dc_left_predictor_32x32/;
|
||||
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_dc_128_predictor_32x32/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_d207_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d207_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_d45_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d45_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_d63_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d63_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_h_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_h_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_d117_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d117_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_d135_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d135_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_d153_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_d153_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_v_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_v_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_tm_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_tm_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_dc_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_dc_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_dc_top_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_dc_top_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_dc_left_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_dc_left_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_dc_128_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
|
||||
specialize qw/vp9_dc_128_predictor_64x64/;
|
||||
}
|
||||
|
||||
#
|
||||
# Loopfilter
|
||||
#
|
||||
@ -366,6 +407,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct64x64_4096_add/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add/;
|
||||
|
||||
@ -419,6 +465,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct32x32_1_add/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct64x64_4096_add/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add/;
|
||||
|
||||
@ -480,6 +531,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
|
||||
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
|
||||
specialize qw/vp9_idct64x64_4096_add/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
|
||||
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
|
||||
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
|
||||
@ -662,6 +718,46 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_dc_128_predictor_32x32/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_d207_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_d207_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_d45_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_d45_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_d63_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_d63_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_h_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_h_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_d117_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_d117_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_d135_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_d135_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_d153_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_d153_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_v_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_v_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_tm_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_tm_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_dc_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_dc_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_dc_top_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_dc_top_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_dc_left_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_dc_left_predictor_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_dc_128_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
|
||||
specialize qw/vp9_highbd_dc_128_predictor_64x64/;
|
||||
}
|
||||
#
|
||||
# Sub Pixel Filters
|
||||
#
|
||||
@ -774,6 +870,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_1024_add/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct64x64_4096_add/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
specialize qw/vp9_highbd_idct32x32_34_add/;
|
||||
|
||||
@ -1144,6 +1245,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_b_32x32/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_fp_64x64/;
|
||||
|
||||
add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_b_64x64/;
|
||||
}
|
||||
} else {
|
||||
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
|
||||
specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
|
||||
@ -1159,6 +1268,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_fp_64x64/;
|
||||
|
||||
add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_quantize_b_64x64/;
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
@ -1213,6 +1330,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct32x32_rd/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64_1/;
|
||||
|
||||
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64/;
|
||||
}
|
||||
} else {
|
||||
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp9_fht4x4 sse2/;
|
||||
@ -1252,6 +1377,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
|
||||
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64_1/;
|
||||
|
||||
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fdct64x64/;
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
@ -1868,6 +2001,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_highbd_quantize_b_32x32/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_highbd_quantize_fp_64x64/;
|
||||
|
||||
add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
|
||||
specialize qw/vp9_highbd_quantize_b_64x64/;
|
||||
}
|
||||
|
||||
#
|
||||
# Structured Similarity (SSIM)
|
||||
#
|
||||
@ -1913,6 +2054,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_highbd_fdct32x32_rd/;
|
||||
|
||||
if (vpx_config("CONFIG_TX64X64") eq "yes") {
|
||||
add_proto qw/void vp9_highbd_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_highbd_fdct64x64_1/;
|
||||
|
||||
add_proto qw/void vp9_highbd_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_highbd_fdct64x64/;
|
||||
}
|
||||
|
||||
add_proto qw/void vp9_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
|
||||
specialize qw/vp9_highbd_temporal_filter_apply/;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -80,8 +80,15 @@ static int decode_unsigned_max(struct vp9_read_bit_buffer *rb, int max) {
|
||||
|
||||
static TX_MODE read_tx_mode(vp9_reader *r) {
|
||||
TX_MODE tx_mode = vp9_read_literal(r, 2);
|
||||
#if CONFIG_TX64X64
|
||||
if (tx_mode == 2)
|
||||
tx_mode += vp9_read_bit(r); // ALLOW_16X16 and ALLOW_32X32
|
||||
else if (tx_mode == 3)
|
||||
tx_mode += 1 + vp9_read_bit(r); // ALLOW_64X64 and TX_MODE_SELECT
|
||||
#else
|
||||
if (tx_mode == ALLOW_32X32)
|
||||
tx_mode += vp9_read_bit(r);
|
||||
#endif
|
||||
return tx_mode;
|
||||
}
|
||||
|
||||
@ -89,16 +96,22 @@ static void read_tx_mode_probs(struct tx_probs *tx_probs, vp9_reader *r) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
|
||||
for (j = 0; j < TX_SIZES - 3; ++j)
|
||||
for (j = 0; j < 1; ++j)
|
||||
vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]);
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
|
||||
for (j = 0; j < TX_SIZES - 2; ++j)
|
||||
for (j = 0; j < 2; ++j)
|
||||
vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]);
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
|
||||
for (j = 0; j < TX_SIZES - 1; ++j)
|
||||
for (j = 0; j < 3; ++j)
|
||||
vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
|
||||
for (j = 0; j < 4; ++j)
|
||||
vp9_diff_update_prob(r, &tx_probs->p64x64[i][j]);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
|
||||
@ -220,6 +233,12 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
|
||||
tx_type = DCT_DCT;
|
||||
vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
|
||||
break;
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
tx_type = DCT_DCT;
|
||||
vp9_highbd_idct64x64_add(dqcoeff, dst, stride, eob, xd->bd);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
assert(0 && "Invalid transform size");
|
||||
}
|
||||
@ -247,6 +266,12 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
|
||||
tx_type = DCT_DCT;
|
||||
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
tx_type = DCT_DCT;
|
||||
vp9_idct64x64_add(dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
assert(0 && "Invalid transform size");
|
||||
return;
|
||||
@ -276,6 +301,12 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
|
||||
tx_type = DCT_DCT;
|
||||
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
tx_type = DCT_DCT;
|
||||
vp9_idct64x64_add(dqcoeff, dst, stride, eob);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
assert(0 && "Invalid transform size");
|
||||
return;
|
||||
@ -321,7 +352,6 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
|
||||
b_width_log2_lookup[plane_bsize], tx_size, mode,
|
||||
dst, pd->dst.stride, dst, pd->dst.stride,
|
||||
x, y, plane);
|
||||
|
||||
if (!mi->mbmi.skip) {
|
||||
const int eob = vp9_decode_block_tokens(cm, xd, plane, block,
|
||||
plane_bsize, x, y, tx_size,
|
||||
@ -701,14 +731,14 @@ static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
|
||||
setup_display_size(cm, rb);
|
||||
|
||||
if (vp9_realloc_frame_buffer(
|
||||
get_frame_new_buffer(cm), cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
get_frame_new_buffer(cm), cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
cm->use_highbitdepth,
|
||||
#endif
|
||||
VP9_DEC_BORDER_IN_PIXELS,
|
||||
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
|
||||
cm->cb_priv)) {
|
||||
VP9_DEC_BORDER_IN_PIXELS,
|
||||
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
|
||||
cm->cb_priv)) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate frame buffer");
|
||||
}
|
||||
@ -779,14 +809,14 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
|
||||
setup_display_size(cm, rb);
|
||||
|
||||
if (vp9_realloc_frame_buffer(
|
||||
get_frame_new_buffer(cm), cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
get_frame_new_buffer(cm), cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
cm->use_highbitdepth,
|
||||
#endif
|
||||
VP9_DEC_BORDER_IN_PIXELS,
|
||||
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
|
||||
cm->cb_priv)) {
|
||||
VP9_DEC_BORDER_IN_PIXELS,
|
||||
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
|
||||
cm->cb_priv)) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate frame buffer");
|
||||
}
|
||||
|
@ -65,8 +65,14 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int tx_size = vp9_read(r, tx_probs[0]);
|
||||
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
|
||||
tx_size += vp9_read(r, tx_probs[1]);
|
||||
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
|
||||
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) {
|
||||
tx_size += vp9_read(r, tx_probs[2]);
|
||||
#if CONFIG_TX64X64
|
||||
if (tx_size != TX_16X16 && max_tx_size >= TX_64X64) {
|
||||
tx_size += vp9_read(r, tx_probs[3]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (!cm->frame_parallel_decoding_mode)
|
||||
|
@ -32,7 +32,7 @@
|
||||
#define INCREMENT_COUNT(token) \
|
||||
do { \
|
||||
if (!cm->frame_parallel_decoding_mode) \
|
||||
++coef_counts[band][ctx][token]; \
|
||||
++coef_counts[band][ctx][token]; \
|
||||
} while (0)
|
||||
|
||||
static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
|
||||
@ -69,9 +69,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
|
||||
counts->coef[tx_size][type][ref];
|
||||
unsigned int (*eob_branch_count)[COEFF_CONTEXTS] =
|
||||
counts->eob_branch[tx_size][type][ref];
|
||||
uint8_t token_cache[32 * 32];
|
||||
uint8_t token_cache[MAX_NUM_COEFS];
|
||||
const uint8_t *band_translate = get_band_translate(tx_size);
|
||||
const int dq_shift = (tx_size == TX_32X32);
|
||||
const int dq_shift = (tx_size > TX_16X16) ? tx_size - TX_16X16 : 0;
|
||||
int v, token;
|
||||
int16_t dqv = dq[0];
|
||||
const uint8_t *cat1_prob;
|
||||
@ -214,6 +214,9 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
const int eob = decode_coefs(cm, xd, pd->plane_type,
|
||||
BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
|
||||
pd->dequant, ctx, so->scan, so->neighbors, r);
|
||||
#if CONFIG_TX64X64
|
||||
if (plane > 0) assert(tx_size != TX_64X64);
|
||||
#endif
|
||||
vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
|
||||
return eob;
|
||||
}
|
||||
|
@ -88,8 +88,13 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
|
||||
vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
|
||||
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
|
||||
vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
|
||||
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
|
||||
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) {
|
||||
vp9_write(w, tx_size != TX_16X16, tx_probs[2]);
|
||||
#if CONFIG_TX64X64
|
||||
if (tx_size != TX_16X16 && max_tx_size >= TX_64X64)
|
||||
vp9_write(w, tx_size != TX_32X32, tx_probs[3]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -684,7 +689,7 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) {
|
||||
vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
|
||||
vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
|
||||
|
||||
for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size)
|
||||
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
|
||||
build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size],
|
||||
frame_coef_probs[tx_size]);
|
||||
|
||||
@ -815,37 +820,60 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
|
||||
static void encode_txfm_probs(VP9_COMMON *cm, vp9_writer *w) {
|
||||
// Mode
|
||||
#if CONFIG_TX64X64
|
||||
if (cm->tx_mode == ALLOW_16X16 || cm->tx_mode == ALLOW_32X32) {
|
||||
vp9_write_literal(w, 2, 2);
|
||||
vp9_write_bit(w, cm->tx_mode == ALLOW_32X32);
|
||||
} else if (cm->tx_mode == ALLOW_64X64 || cm->tx_mode == TX_MODE_SELECT) {
|
||||
vp9_write_literal(w, 3, 2);
|
||||
vp9_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
|
||||
} else {
|
||||
vp9_write_literal(w, cm->tx_mode, 2);
|
||||
}
|
||||
#else
|
||||
vp9_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2);
|
||||
if (cm->tx_mode >= ALLOW_32X32)
|
||||
vp9_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
|
||||
#endif // CONFIG_TX64X64
|
||||
|
||||
// Probabilities
|
||||
if (cm->tx_mode == TX_MODE_SELECT) {
|
||||
int i, j;
|
||||
unsigned int ct_8x8p[TX_SIZES - 3][2];
|
||||
unsigned int ct_16x16p[TX_SIZES - 2][2];
|
||||
unsigned int ct_32x32p[TX_SIZES - 1][2];
|
||||
|
||||
unsigned int ct_8x8p[1][2];
|
||||
unsigned int ct_16x16p[2][2];
|
||||
unsigned int ct_32x32p[3][2];
|
||||
#if CONFIG_TX64X64
|
||||
unsigned int ct_64x64p[4][2];
|
||||
#endif
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
|
||||
tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], ct_8x8p);
|
||||
for (j = 0; j < TX_SIZES - 3; j++)
|
||||
for (j = 0; j < 1; j++)
|
||||
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], ct_8x8p[j]);
|
||||
}
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
|
||||
tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], ct_16x16p);
|
||||
for (j = 0; j < TX_SIZES - 2; j++)
|
||||
for (j = 0; j < 2; j++)
|
||||
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j],
|
||||
ct_16x16p[j]);
|
||||
}
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
|
||||
tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p);
|
||||
for (j = 0; j < TX_SIZES - 1; j++)
|
||||
for (j = 0; j < 3; j++)
|
||||
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
|
||||
ct_32x32p[j]);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
|
||||
tx_counts_to_branch_counts_64x64(cm->counts.tx.p64x64[i], ct_64x64p);
|
||||
for (j = 0; j < 4; j++)
|
||||
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p64x64[i][j],
|
||||
ct_64x64p[j]);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1439,6 +1439,458 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
// TODO(debargha): Using a floating point implementation for now.
|
||||
// Should re-use the 32x32 integer dct we already have.
|
||||
static void dct32_1d(double *input, double *output, int stride) {
|
||||
static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
|
||||
static const double C2 = 0.995184726672; // cos(pi * 2 / 64)
|
||||
static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
|
||||
static const double C4 = 0.980785280403; // cos(pi * 4 / 64)
|
||||
static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
|
||||
static const double C6 = 0.956940335732; // cos(pi * 6 / 64)
|
||||
static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
|
||||
static const double C8 = 0.923879532511; // cos(pi * 8 / 64)
|
||||
static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
|
||||
static const double C10 = 0.881921264348; // cos(pi * 10 / 64)
|
||||
static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
|
||||
static const double C12 = 0.831469612303; // cos(pi * 12 / 64)
|
||||
static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
|
||||
static const double C14 = 0.773010453363; // cos(pi * 14 / 64)
|
||||
static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
|
||||
static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
|
||||
static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
|
||||
static const double C18 = 0.634393284164; // cos(pi * 18 / 64)
|
||||
static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
|
||||
static const double C20 = 0.555570233020; // cos(pi * 20 / 64)
|
||||
static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
|
||||
static const double C22 = 0.471396736826; // cos(pi * 22 / 64)
|
||||
static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
|
||||
static const double C24 = 0.382683432365; // cos(pi * 24 / 64)
|
||||
static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
|
||||
static const double C26 = 0.290284677254; // cos(pi * 26 / 64)
|
||||
static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
|
||||
static const double C28 = 0.195090322016; // cos(pi * 28 / 64)
|
||||
static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
|
||||
static const double C30 = 0.098017140330; // cos(pi * 30 / 64)
|
||||
static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
|
||||
|
||||
double step[32];
|
||||
|
||||
// Stage 1
|
||||
step[0] = input[stride*0] + input[stride*(32 - 1)];
|
||||
step[1] = input[stride*1] + input[stride*(32 - 2)];
|
||||
step[2] = input[stride*2] + input[stride*(32 - 3)];
|
||||
step[3] = input[stride*3] + input[stride*(32 - 4)];
|
||||
step[4] = input[stride*4] + input[stride*(32 - 5)];
|
||||
step[5] = input[stride*5] + input[stride*(32 - 6)];
|
||||
step[6] = input[stride*6] + input[stride*(32 - 7)];
|
||||
step[7] = input[stride*7] + input[stride*(32 - 8)];
|
||||
step[8] = input[stride*8] + input[stride*(32 - 9)];
|
||||
step[9] = input[stride*9] + input[stride*(32 - 10)];
|
||||
step[10] = input[stride*10] + input[stride*(32 - 11)];
|
||||
step[11] = input[stride*11] + input[stride*(32 - 12)];
|
||||
step[12] = input[stride*12] + input[stride*(32 - 13)];
|
||||
step[13] = input[stride*13] + input[stride*(32 - 14)];
|
||||
step[14] = input[stride*14] + input[stride*(32 - 15)];
|
||||
step[15] = input[stride*15] + input[stride*(32 - 16)];
|
||||
step[16] = -input[stride*16] + input[stride*(32 - 17)];
|
||||
step[17] = -input[stride*17] + input[stride*(32 - 18)];
|
||||
step[18] = -input[stride*18] + input[stride*(32 - 19)];
|
||||
step[19] = -input[stride*19] + input[stride*(32 - 20)];
|
||||
step[20] = -input[stride*20] + input[stride*(32 - 21)];
|
||||
step[21] = -input[stride*21] + input[stride*(32 - 22)];
|
||||
step[22] = -input[stride*22] + input[stride*(32 - 23)];
|
||||
step[23] = -input[stride*23] + input[stride*(32 - 24)];
|
||||
step[24] = -input[stride*24] + input[stride*(32 - 25)];
|
||||
step[25] = -input[stride*25] + input[stride*(32 - 26)];
|
||||
step[26] = -input[stride*26] + input[stride*(32 - 27)];
|
||||
step[27] = -input[stride*27] + input[stride*(32 - 28)];
|
||||
step[28] = -input[stride*28] + input[stride*(32 - 29)];
|
||||
step[29] = -input[stride*29] + input[stride*(32 - 30)];
|
||||
step[30] = -input[stride*30] + input[stride*(32 - 31)];
|
||||
step[31] = -input[stride*31] + input[stride*(32 - 32)];
|
||||
|
||||
// Stage 2
|
||||
output[stride*0] = step[0] + step[16 - 1];
|
||||
output[stride*1] = step[1] + step[16 - 2];
|
||||
output[stride*2] = step[2] + step[16 - 3];
|
||||
output[stride*3] = step[3] + step[16 - 4];
|
||||
output[stride*4] = step[4] + step[16 - 5];
|
||||
output[stride*5] = step[5] + step[16 - 6];
|
||||
output[stride*6] = step[6] + step[16 - 7];
|
||||
output[stride*7] = step[7] + step[16 - 8];
|
||||
output[stride*8] = -step[8] + step[16 - 9];
|
||||
output[stride*9] = -step[9] + step[16 - 10];
|
||||
output[stride*10] = -step[10] + step[16 - 11];
|
||||
output[stride*11] = -step[11] + step[16 - 12];
|
||||
output[stride*12] = -step[12] + step[16 - 13];
|
||||
output[stride*13] = -step[13] + step[16 - 14];
|
||||
output[stride*14] = -step[14] + step[16 - 15];
|
||||
output[stride*15] = -step[15] + step[16 - 16];
|
||||
|
||||
output[stride*16] = step[16];
|
||||
output[stride*17] = step[17];
|
||||
output[stride*18] = step[18];
|
||||
output[stride*19] = step[19];
|
||||
|
||||
output[stride*20] = (-step[20] + step[27])*C16;
|
||||
output[stride*21] = (-step[21] + step[26])*C16;
|
||||
output[stride*22] = (-step[22] + step[25])*C16;
|
||||
output[stride*23] = (-step[23] + step[24])*C16;
|
||||
|
||||
output[stride*24] = (step[24] + step[23])*C16;
|
||||
output[stride*25] = (step[25] + step[22])*C16;
|
||||
output[stride*26] = (step[26] + step[21])*C16;
|
||||
output[stride*27] = (step[27] + step[20])*C16;
|
||||
|
||||
output[stride*28] = step[28];
|
||||
output[stride*29] = step[29];
|
||||
output[stride*30] = step[30];
|
||||
output[stride*31] = step[31];
|
||||
|
||||
// Stage 3
|
||||
step[0] = output[stride*0] + output[stride*(8 - 1)];
|
||||
step[1] = output[stride*1] + output[stride*(8 - 2)];
|
||||
step[2] = output[stride*2] + output[stride*(8 - 3)];
|
||||
step[3] = output[stride*3] + output[stride*(8 - 4)];
|
||||
step[4] = -output[stride*4] + output[stride*(8 - 5)];
|
||||
step[5] = -output[stride*5] + output[stride*(8 - 6)];
|
||||
step[6] = -output[stride*6] + output[stride*(8 - 7)];
|
||||
step[7] = -output[stride*7] + output[stride*(8 - 8)];
|
||||
step[8] = output[stride*8];
|
||||
step[9] = output[stride*9];
|
||||
step[10] = (-output[stride*10] + output[stride*13])*C16;
|
||||
step[11] = (-output[stride*11] + output[stride*12])*C16;
|
||||
step[12] = (output[stride*12] + output[stride*11])*C16;
|
||||
step[13] = (output[stride*13] + output[stride*10])*C16;
|
||||
step[14] = output[stride*14];
|
||||
step[15] = output[stride*15];
|
||||
|
||||
step[16] = output[stride*16] + output[stride*23];
|
||||
step[17] = output[stride*17] + output[stride*22];
|
||||
step[18] = output[stride*18] + output[stride*21];
|
||||
step[19] = output[stride*19] + output[stride*20];
|
||||
step[20] = -output[stride*20] + output[stride*19];
|
||||
step[21] = -output[stride*21] + output[stride*18];
|
||||
step[22] = -output[stride*22] + output[stride*17];
|
||||
step[23] = -output[stride*23] + output[stride*16];
|
||||
step[24] = -output[stride*24] + output[stride*31];
|
||||
step[25] = -output[stride*25] + output[stride*30];
|
||||
step[26] = -output[stride*26] + output[stride*29];
|
||||
step[27] = -output[stride*27] + output[stride*28];
|
||||
step[28] = output[stride*28] + output[stride*27];
|
||||
step[29] = output[stride*29] + output[stride*26];
|
||||
step[30] = output[stride*30] + output[stride*25];
|
||||
step[31] = output[stride*31] + output[stride*24];
|
||||
|
||||
// Stage 4
|
||||
output[stride*0] = step[0] + step[3];
|
||||
output[stride*1] = step[1] + step[2];
|
||||
output[stride*2] = -step[2] + step[1];
|
||||
output[stride*3] = -step[3] + step[0];
|
||||
output[stride*4] = step[4];
|
||||
output[stride*5] = (-step[5] + step[6])*C16;
|
||||
output[stride*6] = (step[6] + step[5])*C16;
|
||||
output[stride*7] = step[7];
|
||||
output[stride*8] = step[8] + step[11];
|
||||
output[stride*9] = step[9] + step[10];
|
||||
output[stride*10] = -step[10] + step[9];
|
||||
output[stride*11] = -step[11] + step[8];
|
||||
output[stride*12] = -step[12] + step[15];
|
||||
output[stride*13] = -step[13] + step[14];
|
||||
output[stride*14] = step[14] + step[13];
|
||||
output[stride*15] = step[15] + step[12];
|
||||
|
||||
output[stride*16] = step[16];
|
||||
output[stride*17] = step[17];
|
||||
output[stride*18] = step[18]*-C8 + step[29]*C24;
|
||||
output[stride*19] = step[19]*-C8 + step[28]*C24;
|
||||
output[stride*20] = step[20]*-C24 + step[27]*-C8;
|
||||
output[stride*21] = step[21]*-C24 + step[26]*-C8;
|
||||
output[stride*22] = step[22];
|
||||
output[stride*23] = step[23];
|
||||
output[stride*24] = step[24];
|
||||
output[stride*25] = step[25];
|
||||
output[stride*26] = step[26]*C24 + step[21]*-C8;
|
||||
output[stride*27] = step[27]*C24 + step[20]*-C8;
|
||||
output[stride*28] = step[28]*C8 + step[19]*C24;
|
||||
output[stride*29] = step[29]*C8 + step[18]*C24;
|
||||
output[stride*30] = step[30];
|
||||
output[stride*31] = step[31];
|
||||
|
||||
// Stage 5
|
||||
step[0] = (output[stride*0] + output[stride*1]) * C16;
|
||||
step[1] = (-output[stride*1] + output[stride*0]) * C16;
|
||||
step[2] = output[stride*2]*C24 + output[stride*3] * C8;
|
||||
step[3] = output[stride*3]*C24 - output[stride*2] * C8;
|
||||
step[4] = output[stride*4] + output[stride*5];
|
||||
step[5] = -output[stride*5] + output[stride*4];
|
||||
step[6] = -output[stride*6] + output[stride*7];
|
||||
step[7] = output[stride*7] + output[stride*6];
|
||||
step[8] = output[stride*8];
|
||||
step[9] = output[stride*9]*-C8 + output[stride*14]*C24;
|
||||
step[10] = output[stride*10]*-C24 + output[stride*13]*-C8;
|
||||
step[11] = output[stride*11];
|
||||
step[12] = output[stride*12];
|
||||
step[13] = output[stride*13]*C24 + output[stride*10]*-C8;
|
||||
step[14] = output[stride*14]*C8 + output[stride*9]*C24;
|
||||
step[15] = output[stride*15];
|
||||
|
||||
step[16] = output[stride*16] + output[stride*19];
|
||||
step[17] = output[stride*17] + output[stride*18];
|
||||
step[18] = -output[stride*18] + output[stride*17];
|
||||
step[19] = -output[stride*19] + output[stride*16];
|
||||
step[20] = -output[stride*20] + output[stride*23];
|
||||
step[21] = -output[stride*21] + output[stride*22];
|
||||
step[22] = output[stride*22] + output[stride*21];
|
||||
step[23] = output[stride*23] + output[stride*20];
|
||||
step[24] = output[stride*24] + output[stride*27];
|
||||
step[25] = output[stride*25] + output[stride*26];
|
||||
step[26] = -output[stride*26] + output[stride*25];
|
||||
step[27] = -output[stride*27] + output[stride*24];
|
||||
step[28] = -output[stride*28] + output[stride*31];
|
||||
step[29] = -output[stride*29] + output[stride*30];
|
||||
step[30] = output[stride*30] + output[stride*29];
|
||||
step[31] = output[stride*31] + output[stride*28];
|
||||
|
||||
// Stage 6
|
||||
output[stride*0] = step[0];
|
||||
output[stride*1] = step[1];
|
||||
output[stride*2] = step[2];
|
||||
output[stride*3] = step[3];
|
||||
output[stride*4] = step[4]*C28 + step[7]*C4;
|
||||
output[stride*5] = step[5]*C12 + step[6]*C20;
|
||||
output[stride*6] = step[6]*C12 + step[5]*-C20;
|
||||
output[stride*7] = step[7]*C28 + step[4]*-C4;
|
||||
output[stride*8] = step[8] + step[9];
|
||||
output[stride*9] = -step[9] + step[8];
|
||||
output[stride*10] = -step[10] + step[11];
|
||||
output[stride*11] = step[11] + step[10];
|
||||
output[stride*12] = step[12] + step[13];
|
||||
output[stride*13] = -step[13] + step[12];
|
||||
output[stride*14] = -step[14] + step[15];
|
||||
output[stride*15] = step[15] + step[14];
|
||||
|
||||
output[stride*16] = step[16];
|
||||
output[stride*17] = step[17]*-C4 + step[30]*C28;
|
||||
output[stride*18] = step[18]*-C28 + step[29]*-C4;
|
||||
output[stride*19] = step[19];
|
||||
output[stride*20] = step[20];
|
||||
output[stride*21] = step[21]*-C20 + step[26]*C12;
|
||||
output[stride*22] = step[22]*-C12 + step[25]*-C20;
|
||||
output[stride*23] = step[23];
|
||||
output[stride*24] = step[24];
|
||||
output[stride*25] = step[25]*C12 + step[22]*-C20;
|
||||
output[stride*26] = step[26]*C20 + step[21]*C12;
|
||||
output[stride*27] = step[27];
|
||||
output[stride*28] = step[28];
|
||||
output[stride*29] = step[29]*C28 + step[18]*-C4;
|
||||
output[stride*30] = step[30]*C4 + step[17]*C28;
|
||||
output[stride*31] = step[31];
|
||||
|
||||
// Stage 7
|
||||
step[0] = output[stride*0];
|
||||
step[1] = output[stride*1];
|
||||
step[2] = output[stride*2];
|
||||
step[3] = output[stride*3];
|
||||
step[4] = output[stride*4];
|
||||
step[5] = output[stride*5];
|
||||
step[6] = output[stride*6];
|
||||
step[7] = output[stride*7];
|
||||
step[8] = output[stride*8]*C30 + output[stride*15]*C2;
|
||||
step[9] = output[stride*9]*C14 + output[stride*14]*C18;
|
||||
step[10] = output[stride*10]*C22 + output[stride*13]*C10;
|
||||
step[11] = output[stride*11]*C6 + output[stride*12]*C26;
|
||||
step[12] = output[stride*12]*C6 + output[stride*11]*-C26;
|
||||
step[13] = output[stride*13]*C22 + output[stride*10]*-C10;
|
||||
step[14] = output[stride*14]*C14 + output[stride*9]*-C18;
|
||||
step[15] = output[stride*15]*C30 + output[stride*8]*-C2;
|
||||
|
||||
step[16] = output[stride*16] + output[stride*17];
|
||||
step[17] = -output[stride*17] + output[stride*16];
|
||||
step[18] = -output[stride*18] + output[stride*19];
|
||||
step[19] = output[stride*19] + output[stride*18];
|
||||
step[20] = output[stride*20] + output[stride*21];
|
||||
step[21] = -output[stride*21] + output[stride*20];
|
||||
step[22] = -output[stride*22] + output[stride*23];
|
||||
step[23] = output[stride*23] + output[stride*22];
|
||||
step[24] = output[stride*24] + output[stride*25];
|
||||
step[25] = -output[stride*25] + output[stride*24];
|
||||
step[26] = -output[stride*26] + output[stride*27];
|
||||
step[27] = output[stride*27] + output[stride*26];
|
||||
step[28] = output[stride*28] + output[stride*29];
|
||||
step[29] = -output[stride*29] + output[stride*28];
|
||||
step[30] = -output[stride*30] + output[stride*31];
|
||||
step[31] = output[stride*31] + output[stride*30];
|
||||
|
||||
// Final stage --- outputs indices are bit-reversed.
|
||||
output[stride*0] = step[0];
|
||||
output[stride*16] = step[1];
|
||||
output[stride*8] = step[2];
|
||||
output[stride*24] = step[3];
|
||||
output[stride*4] = step[4];
|
||||
output[stride*20] = step[5];
|
||||
output[stride*12] = step[6];
|
||||
output[stride*28] = step[7];
|
||||
output[stride*2] = step[8];
|
||||
output[stride*18] = step[9];
|
||||
output[stride*10] = step[10];
|
||||
output[stride*26] = step[11];
|
||||
output[stride*6] = step[12];
|
||||
output[stride*22] = step[13];
|
||||
output[stride*14] = step[14];
|
||||
output[stride*30] = step[15];
|
||||
|
||||
output[stride*1] = step[16]*C31 + step[31]*C1;
|
||||
output[stride*17] = step[17]*C15 + step[30]*C17;
|
||||
output[stride*9] = step[18]*C23 + step[29]*C9;
|
||||
output[stride*25] = step[19]*C7 + step[28]*C25;
|
||||
output[stride*5] = step[20]*C27 + step[27]*C5;
|
||||
output[stride*21] = step[21]*C11 + step[26]*C21;
|
||||
output[stride*13] = step[22]*C19 + step[25]*C13;
|
||||
output[stride*29] = step[23]*C3 + step[24]*C29;
|
||||
output[stride*3] = step[24]*C3 + step[23]*-C29;
|
||||
output[stride*19] = step[25]*C19 + step[22]*-C13;
|
||||
output[stride*11] = step[26]*C11 + step[21]*-C21;
|
||||
output[stride*27] = step[27]*C27 + step[20]*-C5;
|
||||
output[stride*7] = step[28]*C7 + step[19]*-C25;
|
||||
output[stride*23] = step[29]*C23 + step[18]*-C9;
|
||||
output[stride*15] = step[30]*C15 + step[17]*-C17;
|
||||
output[stride*31] = step[31]*C31 + step[16]*-C1;
|
||||
}
|
||||
|
||||
static void dct64_1d(double *input, double *output, int stride) {
|
||||
double step1[64], step2[64];
|
||||
int i;
|
||||
static const double C[64] = {
|
||||
1.00000000000000000000, // cos(0 * pi / 128)
|
||||
0.99969881869620424997, // cos(1 * pi / 128)
|
||||
0.99879545620517240501, // cos(2 * pi / 128)
|
||||
0.99729045667869020697, // cos(3 * pi / 128)
|
||||
0.99518472667219692873, // cos(4 * pi / 128)
|
||||
0.99247953459870996706, // cos(5 * pi / 128)
|
||||
0.98917650996478101444, // cos(6 * pi / 128)
|
||||
0.98527764238894122162, // cos(7 * pi / 128)
|
||||
0.98078528040323043058, // cos(8 * pi / 128)
|
||||
0.97570213003852857003, // cos(9 * pi / 128)
|
||||
0.97003125319454397424, // cos(10 * pi / 128)
|
||||
0.96377606579543984022, // cos(11 * pi / 128)
|
||||
0.95694033573220882438, // cos(12 * pi / 128)
|
||||
0.94952818059303667475, // cos(13 * pi / 128)
|
||||
0.94154406518302080631, // cos(14 * pi / 128)
|
||||
0.93299279883473895669, // cos(15 * pi / 128)
|
||||
0.92387953251128673848, // cos(16 * pi / 128)
|
||||
0.91420975570353069095, // cos(17 * pi / 128)
|
||||
0.90398929312344333820, // cos(18 * pi / 128)
|
||||
0.89322430119551532446, // cos(19 * pi / 128)
|
||||
0.88192126434835504956, // cos(20 * pi / 128)
|
||||
0.87008699110871146054, // cos(21 * pi / 128)
|
||||
0.85772861000027211809, // cos(22 * pi / 128)
|
||||
0.84485356524970711689, // cos(23 * pi / 128)
|
||||
0.83146961230254523567, // cos(24 * pi / 128)
|
||||
0.81758481315158371139, // cos(25 * pi / 128)
|
||||
0.80320753148064494287, // cos(26 * pi / 128)
|
||||
0.78834642762660633863, // cos(27 * pi / 128)
|
||||
0.77301045336273699338, // cos(28 * pi / 128)
|
||||
0.75720884650648456748, // cos(29 * pi / 128)
|
||||
0.74095112535495921691, // cos(30 * pi / 128)
|
||||
0.72424708295146700276, // cos(31 * pi / 128)
|
||||
0.70710678118654757274, // cos(32 * pi / 128)
|
||||
0.68954054473706694051, // cos(33 * pi / 128)
|
||||
0.67155895484701844111, // cos(34 * pi / 128)
|
||||
0.65317284295377686654, // cos(35 * pi / 128)
|
||||
0.63439328416364559882, // cos(36 * pi / 128)
|
||||
0.61523159058062693028, // cos(37 * pi / 128)
|
||||
0.59569930449243346793, // cos(38 * pi / 128)
|
||||
0.57580819141784544968, // cos(39 * pi / 128)
|
||||
0.55557023301960228867, // cos(40 * pi / 128)
|
||||
0.53499761988709737537, // cos(41 * pi / 128)
|
||||
0.51410274419322177231, // cos(42 * pi / 128)
|
||||
0.49289819222978414892, // cos(43 * pi / 128)
|
||||
0.47139673682599780857, // cos(44 * pi / 128)
|
||||
0.44961132965460659516, // cos(45 * pi / 128)
|
||||
0.42755509343028219593, // cos(46 * pi / 128)
|
||||
0.40524131400498980549, // cos(47 * pi / 128)
|
||||
0.38268343236508983729, // cos(48 * pi / 128)
|
||||
0.35989503653498827740, // cos(49 * pi / 128)
|
||||
0.33688985339222005111, // cos(50 * pi / 128)
|
||||
0.31368174039889151761, // cos(51 * pi / 128)
|
||||
0.29028467725446227554, // cos(52 * pi / 128)
|
||||
0.26671275747489842090, // cos(53 * pi / 128)
|
||||
0.24298017990326398197, // cos(54 * pi / 128)
|
||||
0.21910124015686976984, // cos(55 * pi / 128)
|
||||
0.19509032201612830359, // cos(56 * pi / 128)
|
||||
0.17096188876030135595, // cos(57 * pi / 128)
|
||||
0.14673047445536174793, // cos(58 * pi / 128)
|
||||
0.12241067519921627893, // cos(59 * pi / 128)
|
||||
0.09801714032956077016, // cos(60 * pi / 128)
|
||||
0.07356456359966745406, // cos(61 * pi / 128)
|
||||
0.04906767432741813290, // cos(62 * pi / 128)
|
||||
0.02454122852291226731, // cos(63 * pi / 128)
|
||||
};
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
step1[i] = input[stride * i] + input[stride * (63 - i)];
|
||||
step1[32 + i] = (input[stride * i] -
|
||||
input[stride * (63 - i)]) * C[i * 2 + 1];
|
||||
}
|
||||
|
||||
dct32_1d(step1, step2, 1);
|
||||
dct32_1d(step1 + 32, step2 + 32, 1);
|
||||
|
||||
for (i = 0; i < 64; i += 2) {
|
||||
output[stride*i] = step2[i / 2];
|
||||
}
|
||||
output[stride * 1] = 2 * step2[32] * C[32];
|
||||
for (i = 3; i < 64; i += 2) {
|
||||
output[stride * i] = 2 * step2[32 + i / 2] - output[stride * (i - 2)];
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_fdct64x64_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
|
||||
{
|
||||
int i, j;
|
||||
double output[4096];
|
||||
// First transform columns
|
||||
for (i = 0; i < 64; i++) {
|
||||
double temp_in[64], temp_out[64];
|
||||
for (j = 0; j < 64; j++)
|
||||
temp_in[j] = input[j * stride + i];
|
||||
dct64_1d(temp_in, temp_out, 1);
|
||||
for (j = 0; j < 64; j++)
|
||||
output[j * 64 + i] = temp_out[j];
|
||||
}
|
||||
// Then transform rows
|
||||
for (i = 0; i < 64; ++i) {
|
||||
double temp_in[64], temp_out[64];
|
||||
for (j = 0; j < 64; ++j)
|
||||
temp_in[j] = output[j + i * 64];
|
||||
dct64_1d(temp_in, temp_out, 1);
|
||||
for (j = 0; j < 64; ++j)
|
||||
output[j + i * 64] = temp_out[j];
|
||||
}
|
||||
// Scale by some magic number
|
||||
for (i = 0; i < 4096; i++) {
|
||||
out[i] = (tran_low_t)round(output[i] / 16);
|
||||
}
|
||||
}
|
||||
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
|
||||
}
|
||||
|
||||
void vp9_fdct64x64_1_c(const int16_t *input, tran_low_t *output, int stride) {
|
||||
int r, c;
|
||||
tran_low_t sum = 0;
|
||||
for (r = 0; r < 64; ++r)
|
||||
for (c = 0; c < 64; ++c)
|
||||
sum += input[r * stride + c];
|
||||
|
||||
output[0] = sum >> 5;
|
||||
output[1] = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
|
||||
int stride) {
|
||||
@ -1498,4 +1950,15 @@ void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
|
||||
int stride) {
|
||||
vp9_fdct32x32_rd_c(input, out, stride);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_highbd_fdct64x64_1_c(const int16_t *input, tran_low_t *out,
|
||||
int stride) {
|
||||
vp9_fdct64x64_1_c(input, out, stride);
|
||||
}
|
||||
|
||||
void vp9_highbd_fdct64x64_c(const int16_t *input, tran_low_t *out, int stride) {
|
||||
vp9_fdct64x64_c(input, out, stride);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -661,11 +661,23 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
|
||||
|
||||
// FIXME(rbultje) I'm pretty sure this should go to the end of this block
|
||||
// (i.e. after the output_enabled)
|
||||
#if CONFIG_TX64X64
|
||||
if (bsize < BLOCK_64X64) {
|
||||
if (bsize < BLOCK_32X32) {
|
||||
if (bsize < BLOCK_16X16) {
|
||||
ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8];
|
||||
}
|
||||
ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16];
|
||||
}
|
||||
ctx->tx_rd_diff[ALLOW_64X64] = ctx->tx_rd_diff[ALLOW_32X32];
|
||||
}
|
||||
#else
|
||||
if (bsize < BLOCK_32X32) {
|
||||
if (bsize < BLOCK_16X16)
|
||||
ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8];
|
||||
ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16];
|
||||
}
|
||||
#endif
|
||||
|
||||
if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
|
||||
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
|
||||
@ -2581,8 +2593,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
|
||||
set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
|
||||
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
||||
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
||||
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
|
||||
cm->frame_type != KEY_FRAME ) {
|
||||
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
|
||||
cm->frame_type != KEY_FRAME ) {
|
||||
choose_partitioning(cpi, tile, mi_row, mi_col);
|
||||
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
|
||||
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
|
||||
@ -2678,7 +2690,11 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
|
||||
if (cpi->mb.e_mbd.lossless)
|
||||
return ONLY_4X4;
|
||||
if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
|
||||
#if CONFIG_TX64X64
|
||||
return ALLOW_64X64;
|
||||
#else
|
||||
return ALLOW_32X32;
|
||||
#endif
|
||||
else if (cpi->sf.tx_size_search_method == USE_FULL_RD||
|
||||
cpi->sf.tx_size_search_method == USE_TX_8X8)
|
||||
return TX_MODE_SELECT;
|
||||
@ -3404,9 +3420,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth)
|
||||
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
||||
else
|
||||
x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vp9_highbd_fdct4x4;
|
||||
else
|
||||
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
|
||||
x->highbd_itxm_add = xd->lossless ? vp9_highbd_iwht4x4_add :
|
||||
vp9_highbd_idct4x4_add;
|
||||
#else
|
||||
@ -3581,41 +3597,99 @@ void vp9_encode_frame(VP9_COMP *cpi) {
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
if (cm->tx_mode == TX_MODE_SELECT) {
|
||||
int count4x4 = 0;
|
||||
int count8x8_lp = 0, count8x8_8x8p = 0;
|
||||
int count4x4_lp = 0;
|
||||
int count8x8_8x8p = 0, count8x8_lp = 0;
|
||||
int count16x16_16x16p = 0, count16x16_lp = 0;
|
||||
int count32x32 = 0;
|
||||
int count32x32_32x32p = 0, count32x32_lp = 0;
|
||||
int count64x64_64x64p = 0;
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
|
||||
count4x4 += cm->counts.tx.p32x32[i][TX_4X4];
|
||||
count4x4 += cm->counts.tx.p16x16[i][TX_4X4];
|
||||
count4x4 += cm->counts.tx.p8x8[i][TX_4X4];
|
||||
count4x4_lp += cm->counts.tx.p64x64[i][TX_4X4];
|
||||
count4x4_lp += cm->counts.tx.p32x32[i][TX_4X4];
|
||||
count4x4_lp += cm->counts.tx.p16x16[i][TX_4X4];
|
||||
count4x4_lp += cm->counts.tx.p8x8[i][TX_4X4];
|
||||
|
||||
count8x8_lp += cm->counts.tx.p64x64[i][TX_8X8];
|
||||
count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8];
|
||||
count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8];
|
||||
count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8];
|
||||
|
||||
count16x16_lp += cm->counts.tx.p64x64[i][TX_16X16];
|
||||
count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16];
|
||||
count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
|
||||
|
||||
count32x32_lp += cm->counts.tx.p64x64[i][TX_32X32];
|
||||
count32x32_32x32p += cm->counts.tx.p32x32[i][TX_32X32];
|
||||
|
||||
count64x64_64x64p += cm->counts.tx.p64x64[i][TX_64X64];
|
||||
}
|
||||
|
||||
if (count4x4_lp == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
|
||||
count32x32_lp == 0 && count32x32_32x32p == 0 &&
|
||||
count64x64_64x64p == 0) {
|
||||
cm->tx_mode = ALLOW_8X8;
|
||||
reset_skip_tx_size(cm, TX_8X8);
|
||||
} else if (count8x8_8x8p == 0 && count8x8_lp == 0 &&
|
||||
count16x16_16x16p == 0 && count16x16_lp == 0 &&
|
||||
count32x32_32x32p == 0 && count32x32_lp == 0 &&
|
||||
count64x64_64x64p == 0) {
|
||||
cm->tx_mode = ONLY_4X4;
|
||||
reset_skip_tx_size(cm, TX_4X4);
|
||||
} else if (count4x4_lp == 0 && count8x8_lp == 0 && count16x16_lp == 0 &&
|
||||
count32x32_lp == 0) {
|
||||
cm->tx_mode = ALLOW_64X64;
|
||||
} else if (count4x4_lp == 0 && count8x8_lp == 0 && count16x16_lp == 0 &&
|
||||
count64x64_64x64p == 0) {
|
||||
cm->tx_mode = ALLOW_32X32;
|
||||
reset_skip_tx_size(cm, TX_32X32);
|
||||
} else if (count4x4_lp == 0 && count8x8_lp == 0 &&
|
||||
count32x32_lp == 0 && count32x32_32x32p == 0 &&
|
||||
count64x64_64x64p == 0) {
|
||||
cm->tx_mode = ALLOW_16X16;
|
||||
reset_skip_tx_size(cm, TX_16X16);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (cm->tx_mode == TX_MODE_SELECT) {
|
||||
int count4x4_lp = 0;
|
||||
int count8x8_8x8p = 0, count8x8_lp = 0;
|
||||
int count16x16_16x16p = 0, count16x16_lp = 0;
|
||||
int count32x32_32x32p = 0;
|
||||
|
||||
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
|
||||
count4x4_lp += cm->counts.tx.p32x32[i][TX_4X4];
|
||||
count4x4_lp += cm->counts.tx.p16x16[i][TX_4X4];
|
||||
count4x4_lp += cm->counts.tx.p8x8[i][TX_4X4];
|
||||
|
||||
count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8];
|
||||
count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8];
|
||||
count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8];
|
||||
|
||||
count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
|
||||
count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16];
|
||||
count32x32 += cm->counts.tx.p32x32[i][TX_32X32];
|
||||
count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
|
||||
count32x32_32x32p += cm->counts.tx.p32x32[i][TX_32X32];
|
||||
}
|
||||
|
||||
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
|
||||
count32x32 == 0) {
|
||||
if (count4x4_lp == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
|
||||
count32x32_32x32p == 0) {
|
||||
cm->tx_mode = ALLOW_8X8;
|
||||
reset_skip_tx_size(cm, TX_8X8);
|
||||
} else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
|
||||
count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
|
||||
count8x8_lp == 0 && count16x16_lp == 0 &&
|
||||
count32x32_32x32p == 0) {
|
||||
cm->tx_mode = ONLY_4X4;
|
||||
reset_skip_tx_size(cm, TX_4X4);
|
||||
} else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
|
||||
} else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4_lp == 0) {
|
||||
cm->tx_mode = ALLOW_32X32;
|
||||
} else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
|
||||
} else if (count32x32_32x32p == 0 && count8x8_lp == 0 &&
|
||||
count4x4_lp == 0) {
|
||||
cm->tx_mode = ALLOW_16X16;
|
||||
reset_skip_tx_size(cm, TX_16X16);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
cm->reference_mode = SINGLE_REFERENCE;
|
||||
encode_frame_internal(cpi);
|
||||
|
@ -135,16 +135,16 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
|
||||
struct macroblock_plane *const p = &mb->plane[plane];
|
||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi);
|
||||
vp9_token_state tokens[1025][2];
|
||||
unsigned best_index[1025][2];
|
||||
uint8_t token_cache[1024];
|
||||
vp9_token_state tokens[MAX_NUM_COEFS + 1][2];
|
||||
unsigned best_index[MAX_NUM_COEFS + 1][2];
|
||||
uint8_t token_cache[MAX_NUM_COEFS];
|
||||
const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
|
||||
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||
const int eob = p->eobs[block];
|
||||
const PLANE_TYPE type = pd->plane_type;
|
||||
const int default_eob = 16 << (tx_size << 1);
|
||||
const int mul = 1 + (tx_size == TX_32X32);
|
||||
const int mul = 1 << (tx_size >= TX_32X32 ? tx_size - TX_16X16 : 0);
|
||||
const int16_t *dequant_ptr = pd->dequant;
|
||||
const uint8_t *const band_translate = get_band_translate(tx_size);
|
||||
const scan_order *const so = get_scan(xd, tx_size, type, block);
|
||||
@ -392,6 +392,16 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_fp_64x64(coeff, 4096, x->skip_block, p->zbin,
|
||||
p->round_fp, p->quant_fp, p->quant_shift,
|
||||
qcoeff, dqcoeff, pd->dequant,
|
||||
p->zbin_extra, eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
|
||||
@ -429,6 +439,15 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_fdct64x64(src_diff, coeff, diff_stride);
|
||||
vp9_quantize_fp_64x64(coeff, 4096, x->skip_block, p->zbin, p->round_fp,
|
||||
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
|
||||
@ -482,6 +501,14 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_highbd_fdct64x64_1(src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_dc_64x64(coeff, x->skip_block, p->round,
|
||||
p->quant_fp[0], qcoeff, dqcoeff,
|
||||
pd->dequant[0], eob);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
|
||||
@ -514,6 +541,14 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_fdct64x64_1(src_diff, coeff, diff_stride);
|
||||
vp9_quantize_dc_64x64(coeff, x->skip_block, p->round,
|
||||
p->quant_fp[0], qcoeff, dqcoeff,
|
||||
pd->dequant[0], eob);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
vp9_fdct32x32_1(src_diff, coeff, diff_stride);
|
||||
vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
|
||||
@ -563,6 +598,15 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
|
||||
p->round, p->quant, p->quant_shift, qcoeff,
|
||||
dqcoeff, pd->dequant, p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
|
||||
@ -599,6 +643,15 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_fdct64x64(src_diff, coeff, diff_stride);
|
||||
vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
|
||||
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
||||
@ -649,6 +702,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
|
||||
a = &ctx->ta[plane][i];
|
||||
l = &ctx->tl[plane][j];
|
||||
if (plane) assert(tx_size != TX_64X64);
|
||||
|
||||
// TODO(jingning): per transformed block zero forcing only enabled for
|
||||
// luma component. will integrate chroma components as well.
|
||||
@ -695,6 +749,12 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_highbd_idct64x64_add(dqcoeff, dst, pd->dst.stride,
|
||||
p->eobs[block], xd->bd);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
|
||||
p->eobs[block], xd->bd);
|
||||
@ -722,6 +782,11 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
vp9_idct64x64_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
||||
break;
|
||||
#endif
|
||||
case TX_32X32:
|
||||
vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
|
||||
break;
|
||||
@ -832,6 +897,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
scan_order = &vp9_default_scan_orders[TX_64X64];
|
||||
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
|
||||
vp9_predict_intra_block(xd, block >> 8, bwl, TX_64X64, mode,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_highbd_subtract_block(64, 64, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride, xd->bd);
|
||||
vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
|
||||
vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
|
||||
p->round, p->quant, p->quant_shift,
|
||||
qcoeff, dqcoeff, pd->dequant,
|
||||
p->zbin_extra, eob,
|
||||
scan_order->scan, scan_order->iscan);
|
||||
if (!x->skip_encode && *eob) {
|
||||
vp9_highbd_idct64x64_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
|
||||
}
|
||||
}
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
scan_order = &vp9_default_scan_orders[TX_32X32];
|
||||
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
|
||||
@ -941,6 +1029,28 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
switch (tx_size) {
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
assert(plane == 0);
|
||||
scan_order = &vp9_default_scan_orders[TX_64X64];
|
||||
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
|
||||
vp9_predict_intra_block(xd, block >> 8, bwl, TX_64X64, mode,
|
||||
x->skip_encode ? src : dst,
|
||||
x->skip_encode ? src_stride : dst_stride,
|
||||
dst, dst_stride, i, j, plane);
|
||||
if (!x->skip_recode) {
|
||||
vp9_subtract_block(64, 64, src_diff, diff_stride,
|
||||
src, src_stride, dst, dst_stride);
|
||||
vp9_fdct64x64(src_diff, coeff, diff_stride);
|
||||
vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan_order->scan,
|
||||
scan_order->iscan);
|
||||
}
|
||||
if (!x->skip_encode && *eob)
|
||||
vp9_idct64x64_add(dqcoeff, dst, dst_stride, *eob);
|
||||
break;
|
||||
#endif // CONFIG_TX64X64
|
||||
case TX_32X32:
|
||||
scan_order = &vp9_default_scan_orders[TX_32X32];
|
||||
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
|
||||
|
@ -3136,7 +3136,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
|
||||
release_scaled_references(cpi);
|
||||
vp9_update_reference_frames(cpi);
|
||||
|
||||
for (t = TX_4X4; t <= TX_32X32; t++)
|
||||
for (t = TX_4X4; t < TX_SIZES; t++)
|
||||
full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
|
||||
|
||||
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)
|
||||
|
@ -65,10 +65,15 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
|
||||
const int16_t *round_ptr, const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr, uint16_t *eob_ptr) {
|
||||
static INLINE void quantize_dc_bigtx(const tran_low_t *coeff_ptr,
|
||||
int skip_block,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr,
|
||||
uint16_t *eob_ptr,
|
||||
int logsizeby32) {
|
||||
const int rc = 0;
|
||||
const int coeff = coeff_ptr[rc];
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
@ -78,24 +83,43 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
|
||||
if (!skip_block) {
|
||||
|
||||
tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
|
||||
tmp = (tmp * quant) >> 15;
|
||||
tmp = (tmp * quant) >> (15 - logsizeby32);
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / (2 << logsizeby32);
|
||||
if (tmp)
|
||||
eob = 0;
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
|
||||
const int16_t *round_ptr, const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr, uint16_t *eob_ptr) {
|
||||
quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
|
||||
const int16_t *round_ptr, const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr, uint16_t *eob_ptr) {
|
||||
quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
|
||||
int skip_block,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr,
|
||||
uint16_t *eob_ptr) {
|
||||
static INLINE void highbd_quantize_dc_bigtx(const tran_low_t *coeff_ptr,
|
||||
int skip_block,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr,
|
||||
uint16_t *eob_ptr,
|
||||
int logsizeby32) {
|
||||
int eob = -1;
|
||||
|
||||
if (!skip_block) {
|
||||
@ -106,15 +130,41 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
|
||||
|
||||
const int64_t tmp =
|
||||
(clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
|
||||
quant) >> 15;
|
||||
quant) >> (15 - logsizeby32);
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / (2 << logsizeby32);
|
||||
if (tmp)
|
||||
eob = 0;
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
|
||||
int skip_block,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr,
|
||||
uint16_t *eob_ptr) {
|
||||
highbd_quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
|
||||
int skip_block,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t quant,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr,
|
||||
uint16_t *eob_ptr) {
|
||||
highbd_quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
@ -210,15 +260,21 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
|
||||
|
||||
// TODO(jingning) Refactor this file and combine functions with similar
|
||||
// operations.
|
||||
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
static INLINE void quantize_fp_bigtx(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan,
|
||||
int logsizeby32) {
|
||||
int i, eob = -1;
|
||||
(void)zbin_ptr;
|
||||
(void)quant_shift_ptr;
|
||||
@ -236,12 +292,13 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int tmp = 0;
|
||||
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
|
||||
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
|
||||
if (abs_coeff >= (dequant_ptr[rc != 0] >> (2 + logsizeby32))) {
|
||||
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
|
||||
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
|
||||
tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
|
||||
tmp = (abs_coeff * quant_ptr[rc != 0]) >> (15 - logsizeby32);
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
|
||||
(2 << logsizeby32);
|
||||
}
|
||||
|
||||
if (tmp)
|
||||
@ -251,18 +308,64 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 0);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 1);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs, int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
static INLINE void highbd_quantize_fp_bigtx(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan,
|
||||
int logsizeby32) {
|
||||
int i, eob = -1;
|
||||
(void)zbin_ptr;
|
||||
(void)quant_shift_ptr;
|
||||
@ -280,12 +383,13 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
|
||||
int64_t tmp = 0;
|
||||
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
|
||||
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
|
||||
if (abs_coeff >= (dequant_ptr[rc != 0] >> (2 + logsizeby32))) {
|
||||
tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
|
||||
INT32_MIN, INT32_MAX);
|
||||
tmp = (tmp * quant_ptr[rc != 0]) >> 15;
|
||||
tmp = (tmp * quant_ptr[rc != 0]) >> (15 - logsizeby32);
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
|
||||
(2 << logsizeby32);
|
||||
}
|
||||
|
||||
if (tmp)
|
||||
@ -294,7 +398,49 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
highbd_quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 0);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_highbd_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
highbd_quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 1);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
@ -403,23 +549,29 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
#endif
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr, const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
static INLINE void quantize_b_bigtx(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan,
|
||||
int logsizeby32) {
|
||||
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
|
||||
ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
|
||||
const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
|
||||
|
||||
int idx = 0;
|
||||
int idx_arr[1024];
|
||||
int idx_arr[MAX_NUM_COEFS];
|
||||
int i, eob = -1;
|
||||
(void)iscan;
|
||||
|
||||
@ -446,13 +598,14 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
int tmp;
|
||||
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
|
||||
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], (1 + logsizeby32));
|
||||
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
|
||||
tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
|
||||
quant_shift_ptr[rc != 0]) >> 15;
|
||||
quant_shift_ptr[rc != 0]) >> (15 - logsizeby32);
|
||||
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
|
||||
(2 << logsizeby32);
|
||||
|
||||
if (tmp)
|
||||
eob = idx_arr[i];
|
||||
@ -461,24 +614,70 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 0);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_quantize_b_64x64_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 1);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs, int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value, uint16_t *eob_ptr,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
static INLINE void highbd_quantize_b_bigtx(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan,
|
||||
int logsizeby32) {
|
||||
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
|
||||
ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
|
||||
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
|
||||
|
||||
int idx = 0;
|
||||
int idx_arr[1024];
|
||||
int idx_arr[MAX_NUM_COEFS];
|
||||
int i, eob = -1;
|
||||
(void)iscan;
|
||||
|
||||
@ -504,14 +703,15 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
|
||||
const int coeff = coeff_ptr[rc];
|
||||
const int coeff_sign = (coeff >> 31);
|
||||
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
|
||||
int64_t tmp = clamp(abs_coeff +
|
||||
ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
|
||||
INT32_MIN, INT32_MAX);
|
||||
int64_t tmp = clamp(
|
||||
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], (1 + logsizeby32)),
|
||||
INT32_MIN, INT32_MAX);
|
||||
tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
|
||||
quant_shift_ptr[rc != 0]) >> 15;
|
||||
quant_shift_ptr[rc != 0]) >> (15 - logsizeby32);
|
||||
|
||||
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
|
||||
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
|
||||
(2 << logsizeby32);
|
||||
|
||||
if (tmp)
|
||||
eob = idx_arr[i];
|
||||
@ -519,7 +719,49 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
|
||||
}
|
||||
*eob_ptr = eob + 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
highbd_quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 0);
|
||||
}
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_highbd_quantize_b_64x64_c(const tran_low_t *coeff_ptr,
|
||||
intptr_t n_coeffs,
|
||||
int skip_block,
|
||||
const int16_t *zbin_ptr,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t *quant_ptr,
|
||||
const int16_t *quant_shift_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t *dequant_ptr,
|
||||
int zbin_oq_value,
|
||||
uint16_t *eob_ptr,
|
||||
const int16_t *scan,
|
||||
const int16_t *iscan) {
|
||||
highbd_quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
|
||||
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
|
||||
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
|
||||
zbin_oq_value, eob_ptr, scan, iscan, 1);
|
||||
}
|
||||
#endif // CONFIG_TX64X64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
|
||||
const int16_t *scan, const int16_t *iscan) {
|
||||
@ -530,21 +772,21 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp9_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block),
|
||||
16, x->skip_block,
|
||||
p->zbin, p->round, p->quant, p->quant_shift,
|
||||
BLOCK_OFFSET(p->qcoeff, block),
|
||||
BLOCK_OFFSET(pd->dqcoeff, block),
|
||||
pd->dequant, p->zbin_extra, &p->eobs[block],
|
||||
scan, iscan);
|
||||
16, x->skip_block,
|
||||
p->zbin, p->round, p->quant, p->quant_shift,
|
||||
BLOCK_OFFSET(p->qcoeff, block),
|
||||
BLOCK_OFFSET(pd->dqcoeff, block),
|
||||
pd->dequant, p->zbin_extra, &p->eobs[block],
|
||||
scan, iscan);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
|
||||
16, x->skip_block,
|
||||
p->zbin, p->round, p->quant, p->quant_shift,
|
||||
BLOCK_OFFSET(p->qcoeff, block),
|
||||
BLOCK_OFFSET(pd->dqcoeff, block),
|
||||
pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan);
|
||||
16, x->skip_block,
|
||||
p->zbin, p->round, p->quant, p->quant_shift,
|
||||
BLOCK_OFFSET(p->qcoeff, block),
|
||||
BLOCK_OFFSET(pd->dqcoeff, block),
|
||||
pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan);
|
||||
}
|
||||
|
||||
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
|
||||
|
@ -45,6 +45,12 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
|
||||
const int16_t *round_ptr, const int16_t quant_ptr,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
|
||||
const int16_t *round_ptr, const int16_t quant_ptr,
|
||||
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr, uint16_t *eob_ptr);
|
||||
#endif // CONFIG_TX64X64
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
|
||||
const int16_t *scan, const int16_t *iscan);
|
||||
|
||||
@ -61,7 +67,17 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr,
|
||||
uint16_t *eob_ptr);
|
||||
#endif
|
||||
#if CONFIG_TX64X64
|
||||
void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
|
||||
int skip_block,
|
||||
const int16_t *round_ptr,
|
||||
const int16_t quant_ptr,
|
||||
tran_low_t *qcoeff_ptr,
|
||||
tran_low_t *dqcoeff_ptr,
|
||||
const int16_t dequant_ptr,
|
||||
uint16_t *eob_ptr);
|
||||
#endif // CONFIG_TX64X64
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
struct VP9_COMP;
|
||||
struct VP9Common;
|
||||
|
@ -76,7 +76,7 @@ static void fill_token_costs(vp9_coeff_cost *c,
|
||||
vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
|
||||
int i, j, k, l;
|
||||
TX_SIZE t;
|
||||
for (t = TX_4X4; t <= TX_32X32; ++t)
|
||||
for (t = TX_4X4; t < TX_SIZES; ++t)
|
||||
for (i = 0; i < PLANE_TYPES; ++i)
|
||||
for (j = 0; j < REF_TYPES; ++j)
|
||||
for (k = 0; k < COEF_BANDS; ++k)
|
||||
@ -425,6 +425,14 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
|
||||
for (i = 0; i < num_4x4_h; i += 8)
|
||||
t_left[i] = !!*(const uint64_t *)&left[i];
|
||||
break;
|
||||
#if CONFIG_TX64X64
|
||||
case TX_64X64:
|
||||
for (i = 0; i < num_4x4_w; i += 16)
|
||||
t_above[i] = !!*(const uint64_t *)&above[i];
|
||||
for (i = 0; i < num_4x4_h; i += 16)
|
||||
t_left[i] = !!*(const uint64_t *)&left[i];
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
assert(0 && "Invalid transform size.");
|
||||
break;
|
||||
|
@ -340,6 +340,9 @@ static const int16_t band_counts[TX_SIZES][8] = {
|
||||
{ 1, 2, 3, 4, 11, 64 - 21, 0 },
|
||||
{ 1, 2, 3, 4, 11, 256 - 21, 0 },
|
||||
{ 1, 2, 3, 4, 11, 1024 - 21, 0 },
|
||||
#if CONFIG_TX64X64
|
||||
{ 1, 2, 3, 4, 11, 4096 - 21, 0 },
|
||||
#endif
|
||||
};
|
||||
static INLINE int cost_coeffs(MACROBLOCK *x,
|
||||
int plane, int block,
|
||||
@ -357,7 +360,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
|
||||
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
|
||||
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
|
||||
x->token_costs[tx_size][type][is_inter_block(mbmi)];
|
||||
uint8_t token_cache[32 * 32];
|
||||
uint8_t token_cache[MAX_NUM_COEFS];
|
||||
int pt = combine_entropy_contexts(*A, *L);
|
||||
int c, cost;
|
||||
// Check for consistency of tx_size with mode info
|
||||
@ -416,6 +419,8 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
|
||||
return cost;
|
||||
}
|
||||
|
||||
#define right_shift_signed(x, s) ((s) < 0 ? (x) << (-(s)) : (x) >> (s))
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static void dist_block(int plane, int block, TX_SIZE tx_size,
|
||||
struct rdcost_block_args* args, int bd) {
|
||||
@ -429,17 +434,23 @@ static void dist_block(int plane, int block, TX_SIZE tx_size,
|
||||
const struct macroblock_plane *const p = &x->plane[plane];
|
||||
const struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
int64_t this_sse;
|
||||
#if CONFIG_TX64X64
|
||||
int shift = (tx_size == TX_64X64 ? -2 : (tx_size == TX_32X32 ? 0 : 2));
|
||||
#else
|
||||
int shift = tx_size == TX_32X32 ? 0 : 2;
|
||||
#endif
|
||||
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
|
||||
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
||||
&this_sse, bd) >> shift;
|
||||
args->dist = right_shift_signed(
|
||||
vp9_highbd_block_error(
|
||||
coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd), shift);
|
||||
#else
|
||||
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
|
||||
&this_sse) >> shift;
|
||||
args->dist = right_shift_signed(
|
||||
vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse), shift);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
args->sse = this_sse >> shift;
|
||||
args->sse = right_shift_signed(this_sse, shift);
|
||||
|
||||
if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
|
||||
// TODO(jingning): tune the model to better capture the distortion.
|
||||
@ -514,9 +525,12 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
dc_correct >>= ((xd->bd - 8) * 2);
|
||||
#endif
|
||||
if (tx_size != TX_32X32)
|
||||
if (tx_size < TX_32X32)
|
||||
dc_correct >>= 2;
|
||||
|
||||
#if CONFIG_TX64X64
|
||||
else if (tx_size == TX_64X64)
|
||||
dc_correct <<= 2;
|
||||
#endif
|
||||
args->dist = MAX(0, args->sse - dc_correct);
|
||||
}
|
||||
} else {
|
||||
@ -629,10 +643,15 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
|
||||
int r[TX_SIZES][2], s[TX_SIZES];
|
||||
int64_t d[TX_SIZES], sse[TX_SIZES];
|
||||
int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
|
||||
{INT64_MAX, INT64_MAX},
|
||||
{INT64_MAX, INT64_MAX},
|
||||
{INT64_MAX, INT64_MAX}};
|
||||
int64_t rd[TX_SIZES][2] = {
|
||||
{INT64_MAX, INT64_MAX},
|
||||
{INT64_MAX, INT64_MAX},
|
||||
{INT64_MAX, INT64_MAX},
|
||||
{INT64_MAX, INT64_MAX},
|
||||
#if CONFIG_TX64X64
|
||||
{INT64_MAX, INT64_MAX},
|
||||
#endif
|
||||
};
|
||||
int n, m;
|
||||
int s0, s1;
|
||||
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
|
||||
@ -681,7 +700,6 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
|
||||
best_tx : MIN(max_tx_size, max_mode_tx_size);
|
||||
|
||||
|
||||
*distortion = d[mbmi->tx_size];
|
||||
*rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
|
||||
*skip = s[mbmi->tx_size];
|
||||
@ -691,8 +709,14 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
|
||||
tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
|
||||
tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
|
||||
#if CONFIG_TX64X64
|
||||
tx_cache[ALLOW_64X64] = rd[MIN(max_tx_size, TX_64X64)][0];
|
||||
#endif
|
||||
|
||||
if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
|
||||
#if CONFIG_TX64X64
|
||||
if (max_tx_size >= TX_64X64 && best_tx == TX_64X64) {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_64X64][1];
|
||||
} else if (max_tx_size >= TX_32X32 && best_tx == TX_32X32) {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
|
||||
} else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
|
||||
@ -701,6 +725,17 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
|
||||
} else {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
|
||||
}
|
||||
#else
|
||||
if (max_tx_size >= TX_32X32 && best_tx == TX_32X32) {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
|
||||
} else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
|
||||
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
|
||||
} else {
|
||||
tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
|
||||
@ -1970,12 +2005,13 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm,
|
||||
}
|
||||
}
|
||||
|
||||
static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
|
||||
int mode_index,
|
||||
int64_t comp_pred_diff[REFERENCE_MODES],
|
||||
const int64_t tx_size_diff[TX_MODES],
|
||||
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
|
||||
int skippable) {
|
||||
static void store_coding_context(
|
||||
MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
|
||||
int mode_index,
|
||||
int64_t comp_pred_diff[REFERENCE_MODES],
|
||||
const int64_t tx_size_diff[TX_MODES],
|
||||
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
|
||||
int skippable) {
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
|
||||
// Take a snapshot of the coding context so it can be
|
||||
|
@ -48,6 +48,10 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
|
||||
sf->adaptive_pred_interp_filter = 1;
|
||||
|
||||
sf->recode_loop = ALLOW_RECODE_KFARFGF;
|
||||
#if CONFIG_TX64X64
|
||||
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
|
||||
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC_H_V;
|
||||
#endif
|
||||
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
|
||||
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
|
||||
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
|
||||
@ -114,6 +118,10 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
|
||||
sf->recode_loop = ALLOW_RECODE_KFMAXBW;
|
||||
sf->adaptive_rd_thresh = 3;
|
||||
sf->mode_skip_start = 6;
|
||||
#if CONFIG_TX64X64
|
||||
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
|
||||
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC;
|
||||
#endif
|
||||
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
|
||||
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
|
||||
sf->adaptive_interp_filter_search = 1;
|
||||
@ -184,6 +192,10 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
||||
sf->adaptive_pred_interp_filter = 1;
|
||||
sf->mv.auto_mv_step_size = 1;
|
||||
sf->adaptive_rd_thresh = 2;
|
||||
#if CONFIG_TX64X64
|
||||
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
|
||||
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC_H_V;
|
||||
#endif
|
||||
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
|
||||
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
|
||||
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
|
||||
@ -246,6 +258,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
||||
sf->intra_uv_mode_mask[i] = INTRA_DC;
|
||||
}
|
||||
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
|
||||
#if CONFIG_TX64X64
|
||||
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
|
||||
#endif
|
||||
sf->frame_parameter_update = 0;
|
||||
sf->mv.search_method = FAST_HEX;
|
||||
|
||||
|
@ -296,7 +296,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
VP9_COMP *cpi = args->cpi;
|
||||
MACROBLOCKD *xd = args->xd;
|
||||
TOKENEXTRA **tp = args->tp;
|
||||
uint8_t token_cache[32 * 32];
|
||||
uint8_t token_cache[MAX_NUM_COEFS];
|
||||
struct macroblock_plane *p = &cpi->mb.plane[plane];
|
||||
struct macroblockd_plane *pd = &xd->plane[plane];
|
||||
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
|
||||
@ -374,7 +374,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
counts[band[c]][pt]);
|
||||
++eob_branch[band[c]][pt];
|
||||
}
|
||||
|
||||
*tp = t;
|
||||
|
||||
vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, aoff, loff);
|
||||
|
Loading…
x
Reference in New Issue
Block a user