Merge "Adding a 64x64 transform mode" into nextgen

This commit is contained in:
Deb Mukherjee 2014-10-30 00:51:35 -07:00 committed by Gerrit Code Review
commit 8bdf4cebb9
28 changed files with 4380 additions and 191 deletions

1
configure vendored
View File

@ -282,6 +282,7 @@ EXPERIMENT_LIST="
vp9_temporal_denoising
fp_mb_stats
emulate_hardware
tx64x64
"
CONFIG_LIST="
external_build

View File

@ -101,22 +101,35 @@ const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
TX_4X4, TX_4X4, TX_4X4,
TX_8X8, TX_8X8, TX_8X8,
TX_16X16, TX_16X16, TX_16X16,
TX_32X32, TX_32X32, TX_32X32, TX_32X32
TX_32X32, TX_32X32, TX_32X32,
#if CONFIG_TX64X64
TX_64X64,
#else
TX_32X32,
#endif
};
const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = {
BLOCK_4X4, // TX_4X4
BLOCK_8X8, // TX_8X8
BLOCK_4X4, // TX_4X4
BLOCK_8X8, // TX_8X8
BLOCK_16X16, // TX_16X16
BLOCK_32X32, // TX_32X32
#if CONFIG_TX64X64
BLOCK_32X32, // TX_64X64
#endif
};
const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
TX_4X4, // ONLY_4X4
TX_8X8, // ALLOW_8X8
TX_4X4, // ONLY_4X4
TX_8X8, // ALLOW_8X8
TX_16X16, // ALLOW_16X16
TX_32X32, // ALLOW_32X32
#if CONFIG_TX64X64
TX_64X64, // ALLOW_64X64
TX_64X64, // TX_MODE_SELECT
#else
TX_32X32, // TX_MODE_SELECT
#endif
};
const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {

View File

@ -44,7 +44,7 @@ const vp9_prob vp9_cat6_prob_high12[] = {
};
#endif
const uint8_t vp9_coefband_trans_8x8plus[1024] = {
const uint8_t vp9_coefband_trans_8x8plus[MAX_NUM_COEFS] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5
@ -111,6 +111,200 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
#if CONFIG_TX64X64
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
#endif
};
const uint8_t vp9_coefband_trans_4x4[16] = {
@ -736,6 +930,92 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
}
};
#if CONFIG_TX64X64
static const vp9_coeff_probs_model default_coef_probs_64x64[PLANE_TYPES] = {
{ // Y plane
{ // Intra
{ // Band 0
{ 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 }
}, { // Band 1
{ 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 },
{ 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 }
}, { // Band 2
{ 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 },
{ 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 }
}, { // Band 3
{ 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 },
{ 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 }
}, { // Band 4
{ 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 },
{ 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 }
}, { // Band 5
{ 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 },
{ 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 }
}
}, { // Inter
{ // Band 0
{ 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 }
}, { // Band 1
{ 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 },
{ 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 }
}, { // Band 2
{ 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 },
{ 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 }
}, { // Band 3
{ 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 },
{ 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 }
}, { // Band 4
{ 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 },
{ 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 }
}, { // Band 5
{ 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 },
{ 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 }
}
}
}, { // UV plane
{ // Intra
{ // Band 0
{ 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 }
}, { // Band 1
{ 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 },
{ 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 }
}, { // Band 2
{ 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 },
{ 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 }
}, { // Band 3
{ 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 },
{ 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 }
}, { // Band 4
{ 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 },
{ 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 }
}, { // Band 5
{ 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 },
{ 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 }
}
}, { // Inter
{ // Band 0
{ 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 }
}, { // Band 1
{ 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 },
{ 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 }
}, { // Band 2
{ 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 },
{ 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 }
}, { // Band 3
{ 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 },
{ 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 }
}, { // Band 4
{ 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 },
{ 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 }
}, { // Band 5
{ 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 },
{ 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 }
}
}
}
};
#endif // CONFIG_TX64X64
static void extend_to_full_distribution(vp9_prob *probs, vp9_prob p) {
vpx_memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1],
MODEL_NODES * sizeof(vp9_prob));
@ -752,6 +1032,9 @@ void vp9_default_coef_probs(VP9_COMMON *cm) {
vp9_copy(cm->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
vp9_copy(cm->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
vp9_copy(cm->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
#if CONFIG_TX64X64
vp9_copy(cm->fc.coef_probs[TX_64X64], default_coef_probs_64x64);
#endif
}
#define COEF_COUNT_SAT 24
@ -806,6 +1089,6 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) {
update_factor = COEF_MAX_UPDATE_FACTOR;
count_sat = COEF_COUNT_SAT;
}
for (t = TX_4X4; t <= TX_32X32; t++)
for (t = TX_4X4; t < TX_SIZES; t++)
adapt_coef_probs(cm, t, count_sat, update_factor);
}

View File

@ -90,10 +90,20 @@ extern const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS];
extern const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS];
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_TX64X64
#define DCT_MAX_VALUE 32768
#else
#define DCT_MAX_VALUE 16384
#endif // CONFIG_TX64X64
#if CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_TX64X64
#define DCT_MAX_VALUE_HIGH10 131072
#define DCT_MAX_VALUE_HIGH12 524288
#else
#define DCT_MAX_VALUE_HIGH10 65536
#define DCT_MAX_VALUE_HIGH12 262144
#endif // CONFIG_TX64X64
#endif // CONFIG_VP9_HIGHBITDEPTH
/* Coefficients are predicted via a 3-dimensional probability table. */
@ -153,7 +163,14 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
// This macro is currently unused but may be used by certain implementations
#define MAXBAND_INDEX 21
DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]);
#if CONFIG_TX64X64
#define MAX_NUM_COEFS 4096
#else
#define MAX_NUM_COEFS 1024
#endif
DECLARE_ALIGNED(16, extern const uint8_t,
vp9_coefband_trans_8x8plus[MAX_NUM_COEFS]);
DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]);
static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
@ -204,6 +221,12 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = !!*(const uint64_t *)a;
left_ec = !!*(const uint64_t *)l;
break;
#if CONFIG_TX64X64
case TX_64X64:
above_ec = !!*(const uint64_t *)a;
left_ec = !!*(const uint64_t *)l;
break;
#endif
default:
assert(0 && "Invalid transform size.");
break;

View File

@ -229,7 +229,7 @@ const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
-D135_PRED, -D117_PRED, /* 5 = D135_NODE */
-D45_PRED, 14, /* 6 = D45_NODE */
-D63_PRED, 16, /* 7 = D63_NODE */
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
};
const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
@ -265,6 +265,11 @@ static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = {
};
static const struct tx_probs default_tx_probs = {
#if CONFIG_TX64X64
{ { 3, 3, 136, 37 },
{ 3, 5, 52, 13 } },
#endif
{ { 3, 136, 37 },
{ 5, 52, 13 } },
@ -275,6 +280,26 @@ static const struct tx_probs default_tx_probs = {
{ 66 } }
};
#if CONFIG_TX64X64
void tx_counts_to_branch_counts_64x64(const unsigned int *tx_count_64x64p,
unsigned int (*ct_64x64p)[2]) {
ct_64x64p[0][0] = tx_count_64x64p[TX_4X4];
ct_64x64p[0][1] = tx_count_64x64p[TX_8X8] +
tx_count_64x64p[TX_16X16] +
tx_count_64x64p[TX_32X32] +
tx_count_64x64p[TX_64X64];
ct_64x64p[1][0] = tx_count_64x64p[TX_8X8];
ct_64x64p[1][1] = tx_count_64x64p[TX_16X16] +
tx_count_64x64p[TX_32X32] +
tx_count_64x64p[TX_64X64];
ct_64x64p[2][0] = tx_count_64x64p[TX_16X16];
ct_64x64p[2][1] = tx_count_64x64p[TX_32X32] +
tx_count_64x64p[TX_64X64];
ct_64x64p[3][0] = tx_count_64x64p[TX_32X32];
ct_64x64p[3][1] = tx_count_64x64p[TX_64X64];
}
#endif
void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
unsigned int (*ct_32x32p)[2]) {
ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
@ -392,25 +417,34 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) {
if (cm->tx_mode == TX_MODE_SELECT) {
int j;
unsigned int branch_ct_8x8p[TX_SIZES - 3][2];
unsigned int branch_ct_16x16p[TX_SIZES - 2][2];
unsigned int branch_ct_32x32p[TX_SIZES - 1][2];
unsigned int branch_ct_8x8p[1][2];
unsigned int branch_ct_16x16p[2][2];
unsigned int branch_ct_32x32p[3][2];
#if CONFIG_TX64X64
unsigned int branch_ct_64x64p[4][2];
#endif
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
for (j = 0; j < TX_SIZES - 3; ++j)
for (j = 0; j < 1; ++j)
fc->tx_probs.p8x8[i][j] = adapt_prob(pre_fc->tx_probs.p8x8[i][j],
branch_ct_8x8p[j]);
tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
for (j = 0; j < TX_SIZES - 2; ++j)
for (j = 0; j < 2; ++j)
fc->tx_probs.p16x16[i][j] = adapt_prob(pre_fc->tx_probs.p16x16[i][j],
branch_ct_16x16p[j]);
tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
for (j = 0; j < TX_SIZES - 1; ++j)
for (j = 0; j < 3; ++j)
fc->tx_probs.p32x32[i][j] = adapt_prob(pre_fc->tx_probs.p32x32[i][j],
branch_ct_32x32p[j]);
#if CONFIG_TX64X64
tx_counts_to_branch_counts_64x64(counts->tx.p64x64[i], branch_ct_64x64p);
for (j = 0; j < 4; ++j)
fc->tx_probs.p64x64[i][j] = adapt_prob(pre_fc->tx_probs.p64x64[i][j],
branch_ct_64x64p[j]);
#endif
}
}

View File

@ -24,15 +24,21 @@ extern "C" {
struct VP9Common;
struct tx_probs {
vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
#if CONFIG_TX64X64
vp9_prob p64x64[TX_SIZE_CONTEXTS][4];
#endif
vp9_prob p32x32[TX_SIZE_CONTEXTS][3];
vp9_prob p16x16[TX_SIZE_CONTEXTS][2];
vp9_prob p8x8[TX_SIZE_CONTEXTS][1];
};
struct tx_counts {
unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES];
unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1];
unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
#if CONFIG_TX64X64
unsigned int p64x64[TX_SIZE_CONTEXTS][5];
#endif
unsigned int p32x32[TX_SIZE_CONTEXTS][4];
unsigned int p16x16[TX_SIZE_CONTEXTS][3];
unsigned int p8x8[TX_SIZE_CONTEXTS][2];
};
typedef struct frame_contexts {
@ -88,6 +94,10 @@ void vp9_init_mode_probs(FRAME_CONTEXT *fc);
void vp9_adapt_mode_probs(struct VP9Common *cm);
#if CONFIG_TX64X64
void tx_counts_to_branch_counts_64x64(const unsigned int *tx_count_64x64p,
unsigned int (*ct_64x64p)[2]);
#endif
void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
unsigned int (*ct_32x32p)[2]);
void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,

View File

@ -77,6 +77,9 @@ typedef enum {
TX_8X8 = 1, // 8x8 transform
TX_16X16 = 2, // 16x16 transform
TX_32X32 = 3, // 32x32 transform
#if CONFIG_TX64X64
TX_64X64 = 4, // 64x64 transform
#endif
TX_SIZES
} TX_SIZE;
@ -86,8 +89,11 @@ typedef enum {
ALLOW_8X8 = 1, // allow block transform size up to 8x8
ALLOW_16X16 = 2, // allow block transform size up to 16x16
ALLOW_32X32 = 3, // allow block transform size up to 32x32
TX_MODE_SELECT = 4, // transform specified for each block
TX_MODES = 5,
#if CONFIG_TX64X64
ALLOW_64X64 = 4, // allow block transform size up to 32x32
#endif
TX_MODE_SELECT, // transform specified for each block
TX_MODES,
} TX_MODE;
typedef enum {

View File

@ -1457,6 +1457,458 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
}
}
#if CONFIG_TX64X64
#define DownshiftMultiplyBy2(x) x * 2
#define DownshiftMultiply(x) x
static void idct16f(double *input, double *output, int stride) {
static const double C1 = 0.995184726672197;
static const double C2 = 0.98078528040323;
static const double C3 = 0.956940335732209;
static const double C4 = 0.923879532511287;
static const double C5 = 0.881921264348355;
static const double C6 = 0.831469612302545;
static const double C7 = 0.773010453362737;
static const double C8 = 0.707106781186548;
static const double C9 = 0.634393284163646;
static const double C10 = 0.555570233019602;
static const double C11 = 0.471396736825998;
static const double C12 = 0.38268343236509;
static const double C13 = 0.290284677254462;
static const double C14 = 0.195090322016128;
static const double C15 = 0.098017140329561;
double step[16];
double intermediate[16];
double temp1, temp2;
// step 1 and 2
step[ 0] = input[stride*0] + input[stride*8];
step[ 1] = input[stride*0] - input[stride*8];
temp1 = input[stride*4]*C12;
temp2 = input[stride*12]*C4;
temp1 -= temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
step[ 2] = DownshiftMultiplyBy2(temp1);
temp1 = input[stride*4]*C4;
temp2 = input[stride*12]*C12;
temp1 += temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
step[ 3] = DownshiftMultiplyBy2(temp1);
temp1 = input[stride*2]*C8;
temp1 = DownshiftMultiplyBy2(temp1);
temp2 = input[stride*6] + input[stride*10];
step[ 4] = temp1 + temp2;
step[ 5] = temp1 - temp2;
temp1 = input[stride*14]*C8;
temp1 = DownshiftMultiplyBy2(temp1);
temp2 = input[stride*6] - input[stride*10];
step[ 6] = temp2 - temp1;
step[ 7] = temp2 + temp1;
// for odd input
temp1 = input[stride*3]*C12;
temp2 = input[stride*13]*C4;
temp1 += temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
intermediate[ 8] = DownshiftMultiplyBy2(temp1);
temp1 = input[stride*3]*C4;
temp2 = input[stride*13]*C12;
temp2 -= temp1;
temp2 = DownshiftMultiply(temp2);
temp2 *= C8;
intermediate[ 9] = DownshiftMultiplyBy2(temp2);
intermediate[10] = DownshiftMultiplyBy2(input[stride*9]*C8);
intermediate[11] = input[stride*15] - input[stride*1];
intermediate[12] = input[stride*15] + input[stride*1];
intermediate[13] = DownshiftMultiplyBy2((input[stride*7]*C8));
temp1 = input[stride*11]*C12;
temp2 = input[stride*5]*C4;
temp2 -= temp1;
temp2 = DownshiftMultiply(temp2);
temp2 *= C8;
intermediate[14] = DownshiftMultiplyBy2(temp2);
temp1 = input[stride*11]*C4;
temp2 = input[stride*5]*C12;
temp1 += temp2;
temp1 = DownshiftMultiply(temp1);
temp1 *= C8;
intermediate[15] = DownshiftMultiplyBy2(temp1);
step[ 8] = intermediate[ 8] + intermediate[14];
step[ 9] = intermediate[ 9] + intermediate[15];
step[10] = intermediate[10] + intermediate[11];
step[11] = intermediate[10] - intermediate[11];
step[12] = intermediate[12] + intermediate[13];
step[13] = intermediate[12] - intermediate[13];
step[14] = intermediate[ 8] - intermediate[14];
step[15] = intermediate[ 9] - intermediate[15];
// step 3
output[stride*0] = step[ 0] + step[ 3];
output[stride*1] = step[ 1] + step[ 2];
output[stride*2] = step[ 1] - step[ 2];
output[stride*3] = step[ 0] - step[ 3];
temp1 = step[ 4]*C14;
temp2 = step[ 7]*C2;
temp1 -= temp2;
output[stride*4] = DownshiftMultiply(temp1);
temp1 = step[ 4]*C2;
temp2 = step[ 7]*C14;
temp1 += temp2;
output[stride*7] = DownshiftMultiply(temp1);
temp1 = step[ 5]*C10;
temp2 = step[ 6]*C6;
temp1 -= temp2;
output[stride*5] = DownshiftMultiply(temp1);
temp1 = step[ 5]*C6;
temp2 = step[ 6]*C10;
temp1 += temp2;
output[stride*6] = DownshiftMultiply(temp1);
output[stride*8] = step[ 8] + step[11];
output[stride*9] = step[ 9] + step[10];
output[stride*10] = step[ 9] - step[10];
output[stride*11] = step[ 8] - step[11];
output[stride*12] = step[12] + step[15];
output[stride*13] = step[13] + step[14];
output[stride*14] = step[13] - step[14];
output[stride*15] = step[12] - step[15];
// output 4
step[ 0] = output[stride*0] + output[stride*7];
step[ 1] = output[stride*1] + output[stride*6];
step[ 2] = output[stride*2] + output[stride*5];
step[ 3] = output[stride*3] + output[stride*4];
step[ 4] = output[stride*3] - output[stride*4];
step[ 5] = output[stride*2] - output[stride*5];
step[ 6] = output[stride*1] - output[stride*6];
step[ 7] = output[stride*0] - output[stride*7];
temp1 = output[stride*8]*C7;
temp2 = output[stride*15]*C9;
temp1 -= temp2;
step[ 8] = DownshiftMultiply(temp1);
temp1 = output[stride*9]*C11;
temp2 = output[stride*14]*C5;
temp1 += temp2;
step[ 9] = DownshiftMultiply(temp1);
temp1 = output[stride*10]*C3;
temp2 = output[stride*13]*C13;
temp1 -= temp2;
step[10] = DownshiftMultiply(temp1);
temp1 = output[stride*11]*C15;
temp2 = output[stride*12]*C1;
temp1 += temp2;
step[11] = DownshiftMultiply(temp1);
temp1 = output[stride*11]*C1;
temp2 = output[stride*12]*C15;
temp2 -= temp1;
step[12] = DownshiftMultiply(temp2);
temp1 = output[stride*10]*C13;
temp2 = output[stride*13]*C3;
temp1 += temp2;
step[13] = DownshiftMultiply(temp1);
temp1 = output[stride*9]*C5;
temp2 = output[stride*14]*C11;
temp2 -= temp1;
step[14] = DownshiftMultiply(temp2);
temp1 = output[stride*8]*C9;
temp2 = output[stride*15]*C7;
temp1 += temp2;
step[15] = DownshiftMultiply(temp1);
// step 5
output[stride*0] = step[0] + step[15];
output[stride*1] = step[1] + step[14];
output[stride*2] = step[2] + step[13];
output[stride*3] = step[3] + step[12];
output[stride*4] = step[4] + step[11];
output[stride*5] = step[5] + step[10];
output[stride*6] = step[6] + step[ 9];
output[stride*7] = step[7] + step[ 8];
output[stride*15] = step[0] - step[15];
output[stride*14] = step[1] - step[14];
output[stride*13] = step[2] - step[13];
output[stride*12] = step[3] - step[12];
output[stride*11] = step[4] - step[11];
output[stride*10] = step[5] - step[10];
output[stride*9] = step[6] - step[ 9];
output[stride*8] = step[7] - step[ 8];
}
static void butterfly_32_idct_1d(double *input, double *output, int stride) {
static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
double step1[32];
double step2[32];
step1[ 0] = input[stride*0];
step1[ 1] = input[stride*2];
step1[ 2] = input[stride*4];
step1[ 3] = input[stride*6];
step1[ 4] = input[stride*8];
step1[ 5] = input[stride*10];
step1[ 6] = input[stride*12];
step1[ 7] = input[stride*14];
step1[ 8] = input[stride*16];
step1[ 9] = input[stride*18];
step1[10] = input[stride*20];
step1[11] = input[stride*22];
step1[12] = input[stride*24];
step1[13] = input[stride*26];
step1[14] = input[stride*28];
step1[15] = input[stride*30];
step1[16] = DownshiftMultiplyBy2(input[stride*1]*C16);
step1[17] = (input[stride*3] + input[stride*1]);
step1[18] = (input[stride*5] + input[stride*3]);
step1[19] = (input[stride*7] + input[stride*5]);
step1[20] = (input[stride*9] + input[stride*7]);
step1[21] = (input[stride*11] + input[stride*9]);
step1[22] = (input[stride*13] + input[stride*11]);
step1[23] = (input[stride*15] + input[stride*13]);
step1[24] = (input[stride*17] + input[stride*15]);
step1[25] = (input[stride*19] + input[stride*17]);
step1[26] = (input[stride*21] + input[stride*19]);
step1[27] = (input[stride*23] + input[stride*21]);
step1[28] = (input[stride*25] + input[stride*23]);
step1[29] = (input[stride*27] + input[stride*25]);
step1[30] = (input[stride*29] + input[stride*27]);
step1[31] = (input[stride*31] + input[stride*29]);
idct16f(step1, step2, 1);
idct16f(step1 + 16, step2 + 16, 1);
step2[16] = DownshiftMultiply(step2[16] / (2*C1));
step2[17] = DownshiftMultiply(step2[17] / (2*C3));
step2[18] = DownshiftMultiply(step2[18] / (2*C5));
step2[19] = DownshiftMultiply(step2[19] / (2*C7));
step2[20] = DownshiftMultiply(step2[20] / (2*C9));
step2[21] = DownshiftMultiply(step2[21] / (2*C11));
step2[22] = DownshiftMultiply(step2[22] / (2*C13));
step2[23] = DownshiftMultiply(step2[23] / (2*C15));
step2[24] = DownshiftMultiply(step2[24] / (2*C17));
step2[25] = DownshiftMultiply(step2[25] / (2*C19));
step2[26] = DownshiftMultiply(step2[26] / (2*C21));
step2[27] = DownshiftMultiply(step2[27] / (2*C23));
step2[28] = DownshiftMultiply(step2[28] / (2*C25));
step2[29] = DownshiftMultiply(step2[29] / (2*C27));
step2[30] = DownshiftMultiply(step2[30] / (2*C29));
step2[31] = DownshiftMultiply(step2[31] / (2*C31));
output[stride* 0] = step2[ 0] + step2[16];
output[stride* 1] = step2[ 1] + step2[17];
output[stride* 2] = step2[ 2] + step2[18];
output[stride* 3] = step2[ 3] + step2[19];
output[stride* 4] = step2[ 4] + step2[20];
output[stride* 5] = step2[ 5] + step2[21];
output[stride* 6] = step2[ 6] + step2[22];
output[stride* 7] = step2[ 7] + step2[23];
output[stride* 8] = step2[ 8] + step2[24];
output[stride* 9] = step2[ 9] + step2[25];
output[stride*10] = step2[10] + step2[26];
output[stride*11] = step2[11] + step2[27];
output[stride*12] = step2[12] + step2[28];
output[stride*13] = step2[13] + step2[29];
output[stride*14] = step2[14] + step2[30];
output[stride*15] = step2[15] + step2[31];
output[stride*16] = step2[15] - step2[(31 - 0)];
output[stride*17] = step2[14] - step2[(31 - 1)];
output[stride*18] = step2[13] - step2[(31 - 2)];
output[stride*19] = step2[12] - step2[(31 - 3)];
output[stride*20] = step2[11] - step2[(31 - 4)];
output[stride*21] = step2[10] - step2[(31 - 5)];
output[stride*22] = step2[ 9] - step2[(31 - 6)];
output[stride*23] = step2[ 8] - step2[(31 - 7)];
output[stride*24] = step2[ 7] - step2[(31 - 8)];
output[stride*25] = step2[ 6] - step2[(31 - 9)];
output[stride*26] = step2[ 5] - step2[(31 - 10)];
output[stride*27] = step2[ 4] - step2[(31 - 11)];
output[stride*28] = step2[ 3] - step2[(31 - 12)];
output[stride*29] = step2[ 2] - step2[(31 - 13)];
output[stride*30] = step2[ 1] - step2[(31 - 14)];
output[stride*31] = step2[ 0] - step2[(31 - 15)];
}
static void butterfly_64_idct_1d(double *input, double *output, int stride) {
double step1[64], step2[64];
int i;
static const double C[64] = {
1.00000000000000000000, // cos(0 * pi / 128)
0.99969881869620424997, // cos(1 * pi / 128)
0.99879545620517240501, // cos(2 * pi / 128)
0.99729045667869020697, // cos(3 * pi / 128)
0.99518472667219692873, // cos(4 * pi / 128)
0.99247953459870996706, // cos(5 * pi / 128)
0.98917650996478101444, // cos(6 * pi / 128)
0.98527764238894122162, // cos(7 * pi / 128)
0.98078528040323043058, // cos(8 * pi / 128)
0.97570213003852857003, // cos(9 * pi / 128)
0.97003125319454397424, // cos(10 * pi / 128)
0.96377606579543984022, // cos(11 * pi / 128)
0.95694033573220882438, // cos(12 * pi / 128)
0.94952818059303667475, // cos(13 * pi / 128)
0.94154406518302080631, // cos(14 * pi / 128)
0.93299279883473895669, // cos(15 * pi / 128)
0.92387953251128673848, // cos(16 * pi / 128)
0.91420975570353069095, // cos(17 * pi / 128)
0.90398929312344333820, // cos(18 * pi / 128)
0.89322430119551532446, // cos(19 * pi / 128)
0.88192126434835504956, // cos(20 * pi / 128)
0.87008699110871146054, // cos(21 * pi / 128)
0.85772861000027211809, // cos(22 * pi / 128)
0.84485356524970711689, // cos(23 * pi / 128)
0.83146961230254523567, // cos(24 * pi / 128)
0.81758481315158371139, // cos(25 * pi / 128)
0.80320753148064494287, // cos(26 * pi / 128)
0.78834642762660633863, // cos(27 * pi / 128)
0.77301045336273699338, // cos(28 * pi / 128)
0.75720884650648456748, // cos(29 * pi / 128)
0.74095112535495921691, // cos(30 * pi / 128)
0.72424708295146700276, // cos(31 * pi / 128)
0.70710678118654757274, // cos(32 * pi / 128)
0.68954054473706694051, // cos(33 * pi / 128)
0.67155895484701844111, // cos(34 * pi / 128)
0.65317284295377686654, // cos(35 * pi / 128)
0.63439328416364559882, // cos(36 * pi / 128)
0.61523159058062693028, // cos(37 * pi / 128)
0.59569930449243346793, // cos(38 * pi / 128)
0.57580819141784544968, // cos(39 * pi / 128)
0.55557023301960228867, // cos(40 * pi / 128)
0.53499761988709737537, // cos(41 * pi / 128)
0.51410274419322177231, // cos(42 * pi / 128)
0.49289819222978414892, // cos(43 * pi / 128)
0.47139673682599780857, // cos(44 * pi / 128)
0.44961132965460659516, // cos(45 * pi / 128)
0.42755509343028219593, // cos(46 * pi / 128)
0.40524131400498980549, // cos(47 * pi / 128)
0.38268343236508983729, // cos(48 * pi / 128)
0.35989503653498827740, // cos(49 * pi / 128)
0.33688985339222005111, // cos(50 * pi / 128)
0.31368174039889151761, // cos(51 * pi / 128)
0.29028467725446227554, // cos(52 * pi / 128)
0.26671275747489842090, // cos(53 * pi / 128)
0.24298017990326398197, // cos(54 * pi / 128)
0.21910124015686976984, // cos(55 * pi / 128)
0.19509032201612830359, // cos(56 * pi / 128)
0.17096188876030135595, // cos(57 * pi / 128)
0.14673047445536174793, // cos(58 * pi / 128)
0.12241067519921627893, // cos(59 * pi / 128)
0.09801714032956077016, // cos(60 * pi / 128)
0.07356456359966745406, // cos(61 * pi / 128)
0.04906767432741813290, // cos(62 * pi / 128)
0.02454122852291226731, // cos(63 * pi / 128)
};
for (i = 0; i < 64; i += 2) {
step1[i / 2] = input[stride * i];
}
step1[32] = DownshiftMultiplyBy2(input[stride*1] * C[32]);
for (i = 3; i < 64; i+=2) {
step1[32 + i/2] = (input[stride * i] + input[stride * (i - 2)]);
}
butterfly_32_idct_1d(step1, step2, 1);
butterfly_32_idct_1d(step1 + 32, step2 + 32, 1);
for (i = 32; i < 64; ++i) {
step2[i] = DownshiftMultiply(step2[i] / (2 * C[(i - 32) * 2 + 1]));
}
for (i = 0; i < 32; ++i) {
output[stride * i] = step2[i] + step2[32 + i];
}
for (i = 0; i < 32; ++i) {
output[stride * (i + 32)] = step2[31 - i] - step2[63 - i];
}
}
void vp9_idct64x64_4096_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
{
double out[64 * 64], out2[64 * 64];
int i, j;
// First transform rows
for (i = 0; i < 64; ++i) {
double temp_in[64], temp_out[64];
for (j = 0; j < 64; ++j)
temp_in[j] = input[j + i * 64];
butterfly_64_idct_1d(temp_in, temp_out, 1);
for (j = 0; j < 64; ++j)
out[j + i * 64] = temp_out[j];
}
// Then transform columns
for (i = 0; i < 64; ++i) {
double temp_in[64], temp_out[64];
for (j = 0; j < 64; ++j)
temp_in[j] = out[j * 64 + i];
butterfly_64_idct_1d(temp_in, temp_out, 1);
for (j = 0; j < 64; ++j)
out2[j * 64 + i] = temp_out[j];
}
for (j = 0; j < 64; ++j) {
for (i = 0; i < 64; ++i)
dest[i] = clip_pixel_add(dest[i], round(out2[j * 64 + i] / 128));
dest += stride;
}
}
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest,
int stride, int eob) {
(void) eob;
vp9_idct64x64_4096_add_c(input, dest, stride);
}
#endif
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
@ -2899,4 +3351,47 @@ void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
}
}
#if CONFIG_TX64X64
void vp9_highbd_idct64x64_4096_add_c(const tran_low_t *input, uint8_t *dest,
int stride, int bd) {
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
{
double out[64 * 64], out2[64 * 64];
int i, j;
// First transform rows
for (i = 0; i < 64; ++i) {
double temp_in[64], temp_out[64];
for (j = 0; j < 64; ++j)
temp_in[j] = input[j + i * 64];
butterfly_64_idct_1d(temp_in, temp_out, 1);
for (j = 0; j < 64; ++j)
out[j + i * 64] = temp_out[j];
}
// Then transform columns
for (i = 0; i < 64; ++i) {
double temp_in[64], temp_out[64];
for (j = 0; j < 64; ++j)
temp_in[j] = out[j * 64 + i];
butterfly_64_idct_1d(temp_in, temp_out, 1);
for (j = 0; j < 64; ++j)
out2[j * 64 + i] = temp_out[j];
}
for (j = 0; j < 64; ++j) {
for (i = 0; i < 64; ++i)
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * 64 + i], round(out2[j * 64 + i] / 128), bd);
dest += stride;
}
}
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
void vp9_highbd_idct64x64_add(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd) {
(void) eob;
vp9_highbd_idct64x64_4096_add_c(input, dest, stride, bd);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_VP9_HIGHBITDEPTH

View File

@ -122,11 +122,14 @@ void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
eob);
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
#if CONFIG_TX64X64
void vp9_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob);
#endif
void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
int stride, int eob);
void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
@ -145,6 +148,10 @@ void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd);
void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd);
#if CONFIG_TX64X64
void vp9_highbd_idct64x64_add(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd);
#endif
void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd);
void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,

View File

@ -38,6 +38,9 @@ static const uint64_t left_64x64_txform_mask[TX_SIZES]= {
0xffffffffffffffff, // TX_8x8
0x5555555555555555, // TX_16x16
0x1111111111111111, // TX_32x32
#if CONFIG_TX64X64
0x0101010101010101, // TX_64x64
#endif
};
// 64 bit masks for above transform size. Each 1 represents a position where
@ -62,6 +65,9 @@ static const uint64_t above_64x64_txform_mask[TX_SIZES]= {
0xffffffffffffffff, // TX_8x8
0x00ff00ff00ff00ff, // TX_16x16
0x000000ff000000ff, // TX_32x32
#if CONFIG_TX64X64
0x00000000000000ff, // TX_64x64
#endif
};
// 64 bit masks for prediction sizes (left). Each 1 represents a position
@ -140,6 +146,9 @@ static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= {
0xffff, // TX_8x8
0x5555, // TX_16x16
0x1111, // TX_32x32
#if CONFIG_TX64X64
0x0101, // TX_64x64, never used
#endif
};
static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
@ -147,6 +156,9 @@ static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
0xffff, // TX_8x8
0x0f0f, // TX_16x16
0x000f, // TX_32x32
#if CONFIG_TX64X64
0x0003, // TX_64x64, never used
#endif
};
// 16 bit left mask to shift and set for each uv prediction size.

View File

@ -107,6 +107,10 @@ static INLINE const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
return tx_probs->p16x16[ctx];
case TX_32X32:
return tx_probs->p32x32[ctx];
#if CONFIG_TX64X64
case TX_64X64:
return tx_probs->p64x64[ctx];
#endif
default:
assert(0 && "Invalid max_tx_size.");
return NULL;
@ -128,6 +132,10 @@ static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
return tx_counts->p16x16[ctx];
case TX_32X32:
return tx_counts->p32x32[ctx];
#if CONFIG_TX64X64
case TX_64X64:
return tx_counts->p64x64[ctx];
#endif
default:
assert(0 && "Invalid max_tx_size.");
return NULL;

View File

@ -47,7 +47,34 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
const uint16_t *left, int bd) { \
highbd_##type##_predictor(dst, stride, size, above, left, bd); \
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_TX64X64
#if CONFIG_VP9_HIGHBITDEPTH
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 4) \
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32) \
intra_pred_sized(type, 64) \
intra_pred_highbd_sized(type, 4) \
intra_pred_highbd_sized(type, 8) \
intra_pred_highbd_sized(type, 16) \
intra_pred_highbd_sized(type, 32) \
intra_pred_highbd_sized(type, 64)
#else
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 4) \
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32) \
intra_pred_sized(type, 64)
#endif // CONFIG_VP9_HIGHBITDEPTH
#else // CONFIG_TX64X64
#if CONFIG_VP9_HIGHBITDEPTH
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 4) \
intra_pred_sized(type, 8) \
@ -57,9 +84,7 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
intra_pred_highbd_sized(type, 8) \
intra_pred_highbd_sized(type, 16) \
intra_pred_highbd_sized(type, 32)
#else
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 4) \
intra_pred_sized(type, 8) \
@ -67,6 +92,8 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
intra_pred_sized(type, 32)
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_TX64X64
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
@ -575,16 +602,25 @@ static intra_pred_fn dc_pred[2][2][TX_SIZES];
typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
const uint16_t *above, const uint16_t *left,
int bd);
static intra_high_pred_fn pred_high[INTRA_MODES][4];
static intra_high_pred_fn dc_pred_high[2][2][4];
static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES];
static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES];
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_init_intra_predictors() {
#if CONFIG_TX64X64
#define INIT_ALL_SIZES(p, type) \
p[TX_4X4] = vp9_##type##_predictor_4x4; \
p[TX_8X8] = vp9_##type##_predictor_8x8; \
p[TX_16X16] = vp9_##type##_predictor_16x16; \
p[TX_32X32] = vp9_##type##_predictor_32x32; \
p[TX_64X64] = vp9_##type##_predictor_64x64
#else
#define INIT_ALL_SIZES(p, type) \
p[TX_4X4] = vp9_##type##_predictor_4x4; \
p[TX_8X8] = vp9_##type##_predictor_8x8; \
p[TX_16X16] = vp9_##type##_predictor_16x16; \
p[TX_32X32] = vp9_##type##_predictor_32x32
#endif
INIT_ALL_SIZES(pred[V_PRED], v);
INIT_ALL_SIZES(pred[H_PRED], h);
@ -638,7 +674,11 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 64);
#if CONFIG_TX64X64
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 256 + 16);
#else
DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 128 + 16);
#endif
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
const int bs = 4 << tx_size;
@ -767,7 +807,11 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int plane) {
int i;
DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64);
#if CONFIG_TX64X64
DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 256 + 16);
#else
DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16);
#endif
uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row;
const int bs = 4 << tx_size;

View File

@ -224,6 +224,47 @@ specialize qw/vp9_dc_left_predictor_32x32/;
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_128_predictor_32x32/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_d207_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_64x64/;
add_proto qw/void vp9_d45_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_64x64/;
add_proto qw/void vp9_d63_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_64x64/;
add_proto qw/void vp9_h_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_h_predictor_64x64/;
add_proto qw/void vp9_d117_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_64x64/;
add_proto qw/void vp9_d135_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_64x64/;
add_proto qw/void vp9_d153_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_64x64/;
add_proto qw/void vp9_v_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_v_predictor_64x64/;
add_proto qw/void vp9_tm_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_tm_predictor_64x64/;
add_proto qw/void vp9_dc_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_64x64/;
add_proto qw/void vp9_dc_top_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_top_predictor_64x64/;
add_proto qw/void vp9_dc_left_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_left_predictor_64x64/;
add_proto qw/void vp9_dc_128_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_128_predictor_64x64/;
}
#
# Loopfilter
#
@ -366,6 +407,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1_add/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct64x64_4096_add/;
}
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht4x4_16_add/;
@ -419,6 +465,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1_add/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct64x64_4096_add/;
}
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht4x4_16_add/;
@ -480,6 +531,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct64x64_4096_add/;
}
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
@ -662,6 +718,46 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_dc_128_predictor_32x32/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_highbd_d207_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_d207_predictor_64x64/;
add_proto qw/void vp9_highbd_d45_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_d45_predictor_64x64/;
add_proto qw/void vp9_highbd_d63_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_d63_predictor_64x64/;
add_proto qw/void vp9_highbd_h_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_h_predictor_64x64/;
add_proto qw/void vp9_highbd_d117_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_d117_predictor_64x64/;
add_proto qw/void vp9_highbd_d135_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_d135_predictor_64x64/;
add_proto qw/void vp9_highbd_d153_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_d153_predictor_64x64/;
add_proto qw/void vp9_highbd_v_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_v_predictor_64x64/;
add_proto qw/void vp9_highbd_tm_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_tm_predictor_64x64/;
add_proto qw/void vp9_highbd_dc_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_dc_predictor_64x64/;
add_proto qw/void vp9_highbd_dc_top_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_dc_top_predictor_64x64/;
add_proto qw/void vp9_highbd_dc_left_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_dc_left_predictor_64x64/;
add_proto qw/void vp9_highbd_dc_128_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vp9_highbd_dc_128_predictor_64x64/;
}
#
# Sub Pixel Filters
#
@ -774,6 +870,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct32x32_1024_add/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_highbd_idct64x64_4096_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct64x64_4096_add/;
}
add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct32x32_34_add/;
@ -1149,6 +1250,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b_32x32/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_64x64/;
add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b_64x64/;
}
} else {
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
@ -1164,6 +1273,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_64x64/;
add_proto qw/void vp9_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b_64x64/;
}
}
#
@ -1218,6 +1335,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_rd/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64_1/;
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64/;
}
} else {
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht4x4 sse2/;
@ -1257,6 +1382,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_rd sse2 avx2/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64_1/;
add_proto qw/void vp9_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct64x64/;
}
}
#
@ -1873,6 +2006,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_b_32x32/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_fp_64x64/;
add_proto qw/void vp9_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_b_64x64/;
}
#
# Structured Similarity (SSIM)
#
@ -1918,6 +2059,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct32x32_rd/;
if (vpx_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void vp9_highbd_fdct64x64_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct64x64_1/;
add_proto qw/void vp9_highbd_fdct64x64/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct64x64/;
}
add_proto qw/void vp9_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
specialize qw/vp9_highbd_temporal_filter_apply/;

File diff suppressed because it is too large Load Diff

View File

@ -80,8 +80,15 @@ static int decode_unsigned_max(struct vp9_read_bit_buffer *rb, int max) {
static TX_MODE read_tx_mode(vp9_reader *r) {
TX_MODE tx_mode = vp9_read_literal(r, 2);
#if CONFIG_TX64X64
if (tx_mode == 2)
tx_mode += vp9_read_bit(r); // ALLOW_16X16 and ALLOW_32X32
else if (tx_mode == 3)
tx_mode += 1 + vp9_read_bit(r); // ALLOW_64X64 and TX_MODE_SELECT
#else
if (tx_mode == ALLOW_32X32)
tx_mode += vp9_read_bit(r);
#endif
return tx_mode;
}
@ -89,16 +96,22 @@ static void read_tx_mode_probs(struct tx_probs *tx_probs, vp9_reader *r) {
int i, j;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 3; ++j)
for (j = 0; j < 1; ++j)
vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 2; ++j)
for (j = 0; j < 2; ++j)
vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]);
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < TX_SIZES - 1; ++j)
for (j = 0; j < 3; ++j)
vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
#if CONFIG_TX64X64
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
for (j = 0; j < 4; ++j)
vp9_diff_update_prob(r, &tx_probs->p64x64[i][j]);
#endif
}
static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
@ -220,6 +233,12 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
tx_type = DCT_DCT;
vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
break;
#if CONFIG_TX64X64
case TX_64X64:
tx_type = DCT_DCT;
vp9_highbd_idct64x64_add(dqcoeff, dst, stride, eob, xd->bd);
break;
#endif
default:
assert(0 && "Invalid transform size");
}
@ -247,6 +266,12 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
tx_type = DCT_DCT;
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
break;
#if CONFIG_TX64X64
case TX_64X64:
tx_type = DCT_DCT;
vp9_idct64x64_add(dqcoeff, dst, stride, eob);
break;
#endif
default:
assert(0 && "Invalid transform size");
return;
@ -276,6 +301,12 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
tx_type = DCT_DCT;
vp9_idct32x32_add(dqcoeff, dst, stride, eob);
break;
#if CONFIG_TX64X64
case TX_64X64:
tx_type = DCT_DCT;
vp9_idct64x64_add(dqcoeff, dst, stride, eob);
break;
#endif
default:
assert(0 && "Invalid transform size");
return;
@ -321,7 +352,6 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
b_width_log2_lookup[plane_bsize], tx_size, mode,
dst, pd->dst.stride, dst, pd->dst.stride,
x, y, plane);
if (!mi->mbmi.skip) {
const int eob = vp9_decode_block_tokens(cm, xd, plane, block,
plane_bsize, x, y, tx_size,
@ -701,14 +731,14 @@ static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
setup_display_size(cm, rb);
if (vp9_realloc_frame_buffer(
get_frame_new_buffer(cm), cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
get_frame_new_buffer(cm), cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
cm->use_highbitdepth,
#endif
VP9_DEC_BORDER_IN_PIXELS,
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
cm->cb_priv)) {
VP9_DEC_BORDER_IN_PIXELS,
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
cm->cb_priv)) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}
@ -779,14 +809,14 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
setup_display_size(cm, rb);
if (vp9_realloc_frame_buffer(
get_frame_new_buffer(cm), cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
get_frame_new_buffer(cm), cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
cm->use_highbitdepth,
#endif
VP9_DEC_BORDER_IN_PIXELS,
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
cm->cb_priv)) {
VP9_DEC_BORDER_IN_PIXELS,
&cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
cm->cb_priv)) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}

View File

@ -65,8 +65,14 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
int tx_size = vp9_read(r, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
tx_size += vp9_read(r, tx_probs[1]);
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) {
tx_size += vp9_read(r, tx_probs[2]);
#if CONFIG_TX64X64
if (tx_size != TX_16X16 && max_tx_size >= TX_64X64) {
tx_size += vp9_read(r, tx_probs[3]);
}
#endif
}
}
if (!cm->frame_parallel_decoding_mode)

View File

@ -32,7 +32,7 @@
#define INCREMENT_COUNT(token) \
do { \
if (!cm->frame_parallel_decoding_mode) \
++coef_counts[band][ctx][token]; \
++coef_counts[band][ctx][token]; \
} while (0)
static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
@ -69,9 +69,9 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
counts->coef[tx_size][type][ref];
unsigned int (*eob_branch_count)[COEFF_CONTEXTS] =
counts->eob_branch[tx_size][type][ref];
uint8_t token_cache[32 * 32];
uint8_t token_cache[MAX_NUM_COEFS];
const uint8_t *band_translate = get_band_translate(tx_size);
const int dq_shift = (tx_size == TX_32X32);
const int dq_shift = (tx_size > TX_16X16) ? tx_size - TX_16X16 : 0;
int v, token;
int16_t dqv = dq[0];
const uint8_t *cat1_prob;
@ -214,6 +214,9 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
const int eob = decode_coefs(cm, xd, pd->plane_type,
BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
pd->dequant, ctx, so->scan, so->neighbors, r);
#if CONFIG_TX64X64
if (plane > 0) assert(tx_size != TX_64X64);
#endif
vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
return eob;
}

View File

@ -88,8 +88,13 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) {
vp9_write(w, tx_size != TX_16X16, tx_probs[2]);
#if CONFIG_TX64X64
if (tx_size != TX_16X16 && max_tx_size >= TX_64X64)
vp9_write(w, tx_size != TX_32X32, tx_probs[3]);
#endif
}
}
}
@ -684,7 +689,7 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) {
vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size)
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size],
frame_coef_probs[tx_size]);
@ -815,37 +820,60 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
static void encode_txfm_probs(VP9_COMMON *cm, vp9_writer *w) {
// Mode
#if CONFIG_TX64X64
if (cm->tx_mode == ALLOW_16X16 || cm->tx_mode == ALLOW_32X32) {
vp9_write_literal(w, 2, 2);
vp9_write_bit(w, cm->tx_mode == ALLOW_32X32);
} else if (cm->tx_mode == ALLOW_64X64 || cm->tx_mode == TX_MODE_SELECT) {
vp9_write_literal(w, 3, 2);
vp9_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
} else {
vp9_write_literal(w, cm->tx_mode, 2);
}
#else
vp9_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2);
if (cm->tx_mode >= ALLOW_32X32)
vp9_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
#endif // CONFIG_TX64X64
// Probabilities
if (cm->tx_mode == TX_MODE_SELECT) {
int i, j;
unsigned int ct_8x8p[TX_SIZES - 3][2];
unsigned int ct_16x16p[TX_SIZES - 2][2];
unsigned int ct_32x32p[TX_SIZES - 1][2];
unsigned int ct_8x8p[1][2];
unsigned int ct_16x16p[2][2];
unsigned int ct_32x32p[3][2];
#if CONFIG_TX64X64
unsigned int ct_64x64p[4][2];
#endif
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], ct_8x8p);
for (j = 0; j < TX_SIZES - 3; j++)
for (j = 0; j < 1; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], ct_8x8p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], ct_16x16p);
for (j = 0; j < TX_SIZES - 2; j++)
for (j = 0; j < 2; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j],
ct_16x16p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p);
for (j = 0; j < TX_SIZES - 1; j++)
for (j = 0; j < 3; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
ct_32x32p[j]);
}
#if CONFIG_TX64X64
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_64x64(cm->counts.tx.p64x64[i], ct_64x64p);
for (j = 0; j < 4; j++)
vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p64x64[i][j],
ct_64x64p[j]);
}
#endif // CONFIG_TX64X64
}
}

View File

@ -1439,6 +1439,458 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
}
}
#if CONFIG_TX64X64
// TODO(debargha): Using a floating point implementation for now.
// Should re-use the 32x32 integer dct we already have.
static void dct32_1d(double *input, double *output, int stride) {
static const double C1 = 0.998795456205; // cos(pi * 1 / 64)
static const double C2 = 0.995184726672; // cos(pi * 2 / 64)
static const double C3 = 0.989176509965; // cos(pi * 3 / 64)
static const double C4 = 0.980785280403; // cos(pi * 4 / 64)
static const double C5 = 0.970031253195; // cos(pi * 5 / 64)
static const double C6 = 0.956940335732; // cos(pi * 6 / 64)
static const double C7 = 0.941544065183; // cos(pi * 7 / 64)
static const double C8 = 0.923879532511; // cos(pi * 8 / 64)
static const double C9 = 0.903989293123; // cos(pi * 9 / 64)
static const double C10 = 0.881921264348; // cos(pi * 10 / 64)
static const double C11 = 0.857728610000; // cos(pi * 11 / 64)
static const double C12 = 0.831469612303; // cos(pi * 12 / 64)
static const double C13 = 0.803207531481; // cos(pi * 13 / 64)
static const double C14 = 0.773010453363; // cos(pi * 14 / 64)
static const double C15 = 0.740951125355; // cos(pi * 15 / 64)
static const double C16 = 0.707106781187; // cos(pi * 16 / 64)
static const double C17 = 0.671558954847; // cos(pi * 17 / 64)
static const double C18 = 0.634393284164; // cos(pi * 18 / 64)
static const double C19 = 0.595699304492; // cos(pi * 19 / 64)
static const double C20 = 0.555570233020; // cos(pi * 20 / 64)
static const double C21 = 0.514102744193; // cos(pi * 21 / 64)
static const double C22 = 0.471396736826; // cos(pi * 22 / 64)
static const double C23 = 0.427555093430; // cos(pi * 23 / 64)
static const double C24 = 0.382683432365; // cos(pi * 24 / 64)
static const double C25 = 0.336889853392; // cos(pi * 25 / 64)
static const double C26 = 0.290284677254; // cos(pi * 26 / 64)
static const double C27 = 0.242980179903; // cos(pi * 27 / 64)
static const double C28 = 0.195090322016; // cos(pi * 28 / 64)
static const double C29 = 0.146730474455; // cos(pi * 29 / 64)
static const double C30 = 0.098017140330; // cos(pi * 30 / 64)
static const double C31 = 0.049067674327; // cos(pi * 31 / 64)
double step[32];
// Stage 1
step[0] = input[stride*0] + input[stride*(32 - 1)];
step[1] = input[stride*1] + input[stride*(32 - 2)];
step[2] = input[stride*2] + input[stride*(32 - 3)];
step[3] = input[stride*3] + input[stride*(32 - 4)];
step[4] = input[stride*4] + input[stride*(32 - 5)];
step[5] = input[stride*5] + input[stride*(32 - 6)];
step[6] = input[stride*6] + input[stride*(32 - 7)];
step[7] = input[stride*7] + input[stride*(32 - 8)];
step[8] = input[stride*8] + input[stride*(32 - 9)];
step[9] = input[stride*9] + input[stride*(32 - 10)];
step[10] = input[stride*10] + input[stride*(32 - 11)];
step[11] = input[stride*11] + input[stride*(32 - 12)];
step[12] = input[stride*12] + input[stride*(32 - 13)];
step[13] = input[stride*13] + input[stride*(32 - 14)];
step[14] = input[stride*14] + input[stride*(32 - 15)];
step[15] = input[stride*15] + input[stride*(32 - 16)];
step[16] = -input[stride*16] + input[stride*(32 - 17)];
step[17] = -input[stride*17] + input[stride*(32 - 18)];
step[18] = -input[stride*18] + input[stride*(32 - 19)];
step[19] = -input[stride*19] + input[stride*(32 - 20)];
step[20] = -input[stride*20] + input[stride*(32 - 21)];
step[21] = -input[stride*21] + input[stride*(32 - 22)];
step[22] = -input[stride*22] + input[stride*(32 - 23)];
step[23] = -input[stride*23] + input[stride*(32 - 24)];
step[24] = -input[stride*24] + input[stride*(32 - 25)];
step[25] = -input[stride*25] + input[stride*(32 - 26)];
step[26] = -input[stride*26] + input[stride*(32 - 27)];
step[27] = -input[stride*27] + input[stride*(32 - 28)];
step[28] = -input[stride*28] + input[stride*(32 - 29)];
step[29] = -input[stride*29] + input[stride*(32 - 30)];
step[30] = -input[stride*30] + input[stride*(32 - 31)];
step[31] = -input[stride*31] + input[stride*(32 - 32)];
// Stage 2
output[stride*0] = step[0] + step[16 - 1];
output[stride*1] = step[1] + step[16 - 2];
output[stride*2] = step[2] + step[16 - 3];
output[stride*3] = step[3] + step[16 - 4];
output[stride*4] = step[4] + step[16 - 5];
output[stride*5] = step[5] + step[16 - 6];
output[stride*6] = step[6] + step[16 - 7];
output[stride*7] = step[7] + step[16 - 8];
output[stride*8] = -step[8] + step[16 - 9];
output[stride*9] = -step[9] + step[16 - 10];
output[stride*10] = -step[10] + step[16 - 11];
output[stride*11] = -step[11] + step[16 - 12];
output[stride*12] = -step[12] + step[16 - 13];
output[stride*13] = -step[13] + step[16 - 14];
output[stride*14] = -step[14] + step[16 - 15];
output[stride*15] = -step[15] + step[16 - 16];
output[stride*16] = step[16];
output[stride*17] = step[17];
output[stride*18] = step[18];
output[stride*19] = step[19];
output[stride*20] = (-step[20] + step[27])*C16;
output[stride*21] = (-step[21] + step[26])*C16;
output[stride*22] = (-step[22] + step[25])*C16;
output[stride*23] = (-step[23] + step[24])*C16;
output[stride*24] = (step[24] + step[23])*C16;
output[stride*25] = (step[25] + step[22])*C16;
output[stride*26] = (step[26] + step[21])*C16;
output[stride*27] = (step[27] + step[20])*C16;
output[stride*28] = step[28];
output[stride*29] = step[29];
output[stride*30] = step[30];
output[stride*31] = step[31];
// Stage 3
step[0] = output[stride*0] + output[stride*(8 - 1)];
step[1] = output[stride*1] + output[stride*(8 - 2)];
step[2] = output[stride*2] + output[stride*(8 - 3)];
step[3] = output[stride*3] + output[stride*(8 - 4)];
step[4] = -output[stride*4] + output[stride*(8 - 5)];
step[5] = -output[stride*5] + output[stride*(8 - 6)];
step[6] = -output[stride*6] + output[stride*(8 - 7)];
step[7] = -output[stride*7] + output[stride*(8 - 8)];
step[8] = output[stride*8];
step[9] = output[stride*9];
step[10] = (-output[stride*10] + output[stride*13])*C16;
step[11] = (-output[stride*11] + output[stride*12])*C16;
step[12] = (output[stride*12] + output[stride*11])*C16;
step[13] = (output[stride*13] + output[stride*10])*C16;
step[14] = output[stride*14];
step[15] = output[stride*15];
step[16] = output[stride*16] + output[stride*23];
step[17] = output[stride*17] + output[stride*22];
step[18] = output[stride*18] + output[stride*21];
step[19] = output[stride*19] + output[stride*20];
step[20] = -output[stride*20] + output[stride*19];
step[21] = -output[stride*21] + output[stride*18];
step[22] = -output[stride*22] + output[stride*17];
step[23] = -output[stride*23] + output[stride*16];
step[24] = -output[stride*24] + output[stride*31];
step[25] = -output[stride*25] + output[stride*30];
step[26] = -output[stride*26] + output[stride*29];
step[27] = -output[stride*27] + output[stride*28];
step[28] = output[stride*28] + output[stride*27];
step[29] = output[stride*29] + output[stride*26];
step[30] = output[stride*30] + output[stride*25];
step[31] = output[stride*31] + output[stride*24];
// Stage 4
output[stride*0] = step[0] + step[3];
output[stride*1] = step[1] + step[2];
output[stride*2] = -step[2] + step[1];
output[stride*3] = -step[3] + step[0];
output[stride*4] = step[4];
output[stride*5] = (-step[5] + step[6])*C16;
output[stride*6] = (step[6] + step[5])*C16;
output[stride*7] = step[7];
output[stride*8] = step[8] + step[11];
output[stride*9] = step[9] + step[10];
output[stride*10] = -step[10] + step[9];
output[stride*11] = -step[11] + step[8];
output[stride*12] = -step[12] + step[15];
output[stride*13] = -step[13] + step[14];
output[stride*14] = step[14] + step[13];
output[stride*15] = step[15] + step[12];
output[stride*16] = step[16];
output[stride*17] = step[17];
output[stride*18] = step[18]*-C8 + step[29]*C24;
output[stride*19] = step[19]*-C8 + step[28]*C24;
output[stride*20] = step[20]*-C24 + step[27]*-C8;
output[stride*21] = step[21]*-C24 + step[26]*-C8;
output[stride*22] = step[22];
output[stride*23] = step[23];
output[stride*24] = step[24];
output[stride*25] = step[25];
output[stride*26] = step[26]*C24 + step[21]*-C8;
output[stride*27] = step[27]*C24 + step[20]*-C8;
output[stride*28] = step[28]*C8 + step[19]*C24;
output[stride*29] = step[29]*C8 + step[18]*C24;
output[stride*30] = step[30];
output[stride*31] = step[31];
// Stage 5
step[0] = (output[stride*0] + output[stride*1]) * C16;
step[1] = (-output[stride*1] + output[stride*0]) * C16;
step[2] = output[stride*2]*C24 + output[stride*3] * C8;
step[3] = output[stride*3]*C24 - output[stride*2] * C8;
step[4] = output[stride*4] + output[stride*5];
step[5] = -output[stride*5] + output[stride*4];
step[6] = -output[stride*6] + output[stride*7];
step[7] = output[stride*7] + output[stride*6];
step[8] = output[stride*8];
step[9] = output[stride*9]*-C8 + output[stride*14]*C24;
step[10] = output[stride*10]*-C24 + output[stride*13]*-C8;
step[11] = output[stride*11];
step[12] = output[stride*12];
step[13] = output[stride*13]*C24 + output[stride*10]*-C8;
step[14] = output[stride*14]*C8 + output[stride*9]*C24;
step[15] = output[stride*15];
step[16] = output[stride*16] + output[stride*19];
step[17] = output[stride*17] + output[stride*18];
step[18] = -output[stride*18] + output[stride*17];
step[19] = -output[stride*19] + output[stride*16];
step[20] = -output[stride*20] + output[stride*23];
step[21] = -output[stride*21] + output[stride*22];
step[22] = output[stride*22] + output[stride*21];
step[23] = output[stride*23] + output[stride*20];
step[24] = output[stride*24] + output[stride*27];
step[25] = output[stride*25] + output[stride*26];
step[26] = -output[stride*26] + output[stride*25];
step[27] = -output[stride*27] + output[stride*24];
step[28] = -output[stride*28] + output[stride*31];
step[29] = -output[stride*29] + output[stride*30];
step[30] = output[stride*30] + output[stride*29];
step[31] = output[stride*31] + output[stride*28];
// Stage 6
output[stride*0] = step[0];
output[stride*1] = step[1];
output[stride*2] = step[2];
output[stride*3] = step[3];
output[stride*4] = step[4]*C28 + step[7]*C4;
output[stride*5] = step[5]*C12 + step[6]*C20;
output[stride*6] = step[6]*C12 + step[5]*-C20;
output[stride*7] = step[7]*C28 + step[4]*-C4;
output[stride*8] = step[8] + step[9];
output[stride*9] = -step[9] + step[8];
output[stride*10] = -step[10] + step[11];
output[stride*11] = step[11] + step[10];
output[stride*12] = step[12] + step[13];
output[stride*13] = -step[13] + step[12];
output[stride*14] = -step[14] + step[15];
output[stride*15] = step[15] + step[14];
output[stride*16] = step[16];
output[stride*17] = step[17]*-C4 + step[30]*C28;
output[stride*18] = step[18]*-C28 + step[29]*-C4;
output[stride*19] = step[19];
output[stride*20] = step[20];
output[stride*21] = step[21]*-C20 + step[26]*C12;
output[stride*22] = step[22]*-C12 + step[25]*-C20;
output[stride*23] = step[23];
output[stride*24] = step[24];
output[stride*25] = step[25]*C12 + step[22]*-C20;
output[stride*26] = step[26]*C20 + step[21]*C12;
output[stride*27] = step[27];
output[stride*28] = step[28];
output[stride*29] = step[29]*C28 + step[18]*-C4;
output[stride*30] = step[30]*C4 + step[17]*C28;
output[stride*31] = step[31];
// Stage 7
step[0] = output[stride*0];
step[1] = output[stride*1];
step[2] = output[stride*2];
step[3] = output[stride*3];
step[4] = output[stride*4];
step[5] = output[stride*5];
step[6] = output[stride*6];
step[7] = output[stride*7];
step[8] = output[stride*8]*C30 + output[stride*15]*C2;
step[9] = output[stride*9]*C14 + output[stride*14]*C18;
step[10] = output[stride*10]*C22 + output[stride*13]*C10;
step[11] = output[stride*11]*C6 + output[stride*12]*C26;
step[12] = output[stride*12]*C6 + output[stride*11]*-C26;
step[13] = output[stride*13]*C22 + output[stride*10]*-C10;
step[14] = output[stride*14]*C14 + output[stride*9]*-C18;
step[15] = output[stride*15]*C30 + output[stride*8]*-C2;
step[16] = output[stride*16] + output[stride*17];
step[17] = -output[stride*17] + output[stride*16];
step[18] = -output[stride*18] + output[stride*19];
step[19] = output[stride*19] + output[stride*18];
step[20] = output[stride*20] + output[stride*21];
step[21] = -output[stride*21] + output[stride*20];
step[22] = -output[stride*22] + output[stride*23];
step[23] = output[stride*23] + output[stride*22];
step[24] = output[stride*24] + output[stride*25];
step[25] = -output[stride*25] + output[stride*24];
step[26] = -output[stride*26] + output[stride*27];
step[27] = output[stride*27] + output[stride*26];
step[28] = output[stride*28] + output[stride*29];
step[29] = -output[stride*29] + output[stride*28];
step[30] = -output[stride*30] + output[stride*31];
step[31] = output[stride*31] + output[stride*30];
// Final stage --- outputs indices are bit-reversed.
output[stride*0] = step[0];
output[stride*16] = step[1];
output[stride*8] = step[2];
output[stride*24] = step[3];
output[stride*4] = step[4];
output[stride*20] = step[5];
output[stride*12] = step[6];
output[stride*28] = step[7];
output[stride*2] = step[8];
output[stride*18] = step[9];
output[stride*10] = step[10];
output[stride*26] = step[11];
output[stride*6] = step[12];
output[stride*22] = step[13];
output[stride*14] = step[14];
output[stride*30] = step[15];
output[stride*1] = step[16]*C31 + step[31]*C1;
output[stride*17] = step[17]*C15 + step[30]*C17;
output[stride*9] = step[18]*C23 + step[29]*C9;
output[stride*25] = step[19]*C7 + step[28]*C25;
output[stride*5] = step[20]*C27 + step[27]*C5;
output[stride*21] = step[21]*C11 + step[26]*C21;
output[stride*13] = step[22]*C19 + step[25]*C13;
output[stride*29] = step[23]*C3 + step[24]*C29;
output[stride*3] = step[24]*C3 + step[23]*-C29;
output[stride*19] = step[25]*C19 + step[22]*-C13;
output[stride*11] = step[26]*C11 + step[21]*-C21;
output[stride*27] = step[27]*C27 + step[20]*-C5;
output[stride*7] = step[28]*C7 + step[19]*-C25;
output[stride*23] = step[29]*C23 + step[18]*-C9;
output[stride*15] = step[30]*C15 + step[17]*-C17;
output[stride*31] = step[31]*C31 + step[16]*-C1;
}
static void dct64_1d(double *input, double *output, int stride) {
double step1[64], step2[64];
int i;
static const double C[64] = {
1.00000000000000000000, // cos(0 * pi / 128)
0.99969881869620424997, // cos(1 * pi / 128)
0.99879545620517240501, // cos(2 * pi / 128)
0.99729045667869020697, // cos(3 * pi / 128)
0.99518472667219692873, // cos(4 * pi / 128)
0.99247953459870996706, // cos(5 * pi / 128)
0.98917650996478101444, // cos(6 * pi / 128)
0.98527764238894122162, // cos(7 * pi / 128)
0.98078528040323043058, // cos(8 * pi / 128)
0.97570213003852857003, // cos(9 * pi / 128)
0.97003125319454397424, // cos(10 * pi / 128)
0.96377606579543984022, // cos(11 * pi / 128)
0.95694033573220882438, // cos(12 * pi / 128)
0.94952818059303667475, // cos(13 * pi / 128)
0.94154406518302080631, // cos(14 * pi / 128)
0.93299279883473895669, // cos(15 * pi / 128)
0.92387953251128673848, // cos(16 * pi / 128)
0.91420975570353069095, // cos(17 * pi / 128)
0.90398929312344333820, // cos(18 * pi / 128)
0.89322430119551532446, // cos(19 * pi / 128)
0.88192126434835504956, // cos(20 * pi / 128)
0.87008699110871146054, // cos(21 * pi / 128)
0.85772861000027211809, // cos(22 * pi / 128)
0.84485356524970711689, // cos(23 * pi / 128)
0.83146961230254523567, // cos(24 * pi / 128)
0.81758481315158371139, // cos(25 * pi / 128)
0.80320753148064494287, // cos(26 * pi / 128)
0.78834642762660633863, // cos(27 * pi / 128)
0.77301045336273699338, // cos(28 * pi / 128)
0.75720884650648456748, // cos(29 * pi / 128)
0.74095112535495921691, // cos(30 * pi / 128)
0.72424708295146700276, // cos(31 * pi / 128)
0.70710678118654757274, // cos(32 * pi / 128)
0.68954054473706694051, // cos(33 * pi / 128)
0.67155895484701844111, // cos(34 * pi / 128)
0.65317284295377686654, // cos(35 * pi / 128)
0.63439328416364559882, // cos(36 * pi / 128)
0.61523159058062693028, // cos(37 * pi / 128)
0.59569930449243346793, // cos(38 * pi / 128)
0.57580819141784544968, // cos(39 * pi / 128)
0.55557023301960228867, // cos(40 * pi / 128)
0.53499761988709737537, // cos(41 * pi / 128)
0.51410274419322177231, // cos(42 * pi / 128)
0.49289819222978414892, // cos(43 * pi / 128)
0.47139673682599780857, // cos(44 * pi / 128)
0.44961132965460659516, // cos(45 * pi / 128)
0.42755509343028219593, // cos(46 * pi / 128)
0.40524131400498980549, // cos(47 * pi / 128)
0.38268343236508983729, // cos(48 * pi / 128)
0.35989503653498827740, // cos(49 * pi / 128)
0.33688985339222005111, // cos(50 * pi / 128)
0.31368174039889151761, // cos(51 * pi / 128)
0.29028467725446227554, // cos(52 * pi / 128)
0.26671275747489842090, // cos(53 * pi / 128)
0.24298017990326398197, // cos(54 * pi / 128)
0.21910124015686976984, // cos(55 * pi / 128)
0.19509032201612830359, // cos(56 * pi / 128)
0.17096188876030135595, // cos(57 * pi / 128)
0.14673047445536174793, // cos(58 * pi / 128)
0.12241067519921627893, // cos(59 * pi / 128)
0.09801714032956077016, // cos(60 * pi / 128)
0.07356456359966745406, // cos(61 * pi / 128)
0.04906767432741813290, // cos(62 * pi / 128)
0.02454122852291226731, // cos(63 * pi / 128)
};
for (i = 0; i < 32; ++i) {
step1[i] = input[stride * i] + input[stride * (63 - i)];
step1[32 + i] = (input[stride * i] -
input[stride * (63 - i)]) * C[i * 2 + 1];
}
dct32_1d(step1, step2, 1);
dct32_1d(step1 + 32, step2 + 32, 1);
for (i = 0; i < 64; i += 2) {
output[stride*i] = step2[i / 2];
}
output[stride * 1] = 2 * step2[32] * C[32];
for (i = 3; i < 64; i += 2) {
output[stride * i] = 2 * step2[32 + i / 2] - output[stride * (i - 2)];
}
}
void vp9_fdct64x64_c(const int16_t *input, tran_low_t *out, int stride) {
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
{
int i, j;
double output[4096];
// First transform columns
for (i = 0; i < 64; i++) {
double temp_in[64], temp_out[64];
for (j = 0; j < 64; j++)
temp_in[j] = input[j * stride + i];
dct64_1d(temp_in, temp_out, 1);
for (j = 0; j < 64; j++)
output[j * 64 + i] = temp_out[j];
}
// Then transform rows
for (i = 0; i < 64; ++i) {
double temp_in[64], temp_out[64];
for (j = 0; j < 64; ++j)
temp_in[j] = output[j + i * 64];
dct64_1d(temp_in, temp_out, 1);
for (j = 0; j < 64; ++j)
output[j + i * 64] = temp_out[j];
}
// Scale by some magic number
for (i = 0; i < 4096; i++) {
out[i] = (tran_low_t)round(output[i] / 16);
}
}
// vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
void vp9_fdct64x64_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
tran_low_t sum = 0;
for (r = 0; r < 64; ++r)
for (c = 0; c < 64; ++c)
sum += input[r * stride + c];
output[0] = sum >> 5;
output[1] = 0;
}
#endif
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
int stride) {
@ -1498,4 +1950,15 @@ void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
int stride) {
vp9_fdct32x32_rd_c(input, out, stride);
}
#if CONFIG_TX64X64
void vp9_highbd_fdct64x64_1_c(const int16_t *input, tran_low_t *out,
int stride) {
vp9_fdct64x64_1_c(input, out, stride);
}
void vp9_highbd_fdct64x64_c(const int16_t *input, tran_low_t *out, int stride) {
vp9_fdct64x64_c(input, out, stride);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_VP9_HIGHBITDEPTH

View File

@ -673,11 +673,23 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
// FIXME(rbultje) I'm pretty sure this should go to the end of this block
// (i.e. after the output_enabled)
#if CONFIG_TX64X64
if (bsize < BLOCK_64X64) {
if (bsize < BLOCK_32X32) {
if (bsize < BLOCK_16X16) {
ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8];
}
ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16];
}
ctx->tx_rd_diff[ALLOW_64X64] = ctx->tx_rd_diff[ALLOW_32X32];
}
#else
if (bsize < BLOCK_32X32) {
if (bsize < BLOCK_16X16)
ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8];
ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16];
}
#endif
if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
@ -2567,7 +2579,11 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
if (cpi->mb.e_mbd.lossless)
return ONLY_4X4;
if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
#if CONFIG_TX64X64
return ALLOW_64X64;
#else
return ALLOW_32X32;
#endif
else if (cpi->sf.tx_size_search_method == USE_FULL_RD||
cpi->sf.tx_size_search_method == USE_TX_8X8)
return TX_MODE_SELECT;
@ -3435,9 +3451,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
else
x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vp9_highbd_fdct4x4;
else
x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vp9_fdct4x4;
x->highbd_itxm_add = xd->lossless ? vp9_highbd_iwht4x4_add :
vp9_highbd_idct4x4_add;
#else
@ -3612,41 +3628,99 @@ void vp9_encode_frame(VP9_COMP *cpi) {
}
}
#if CONFIG_TX64X64
if (cm->tx_mode == TX_MODE_SELECT) {
int count4x4 = 0;
int count8x8_lp = 0, count8x8_8x8p = 0;
int count4x4_lp = 0;
int count8x8_8x8p = 0, count8x8_lp = 0;
int count16x16_16x16p = 0, count16x16_lp = 0;
int count32x32 = 0;
int count32x32_32x32p = 0, count32x32_lp = 0;
int count64x64_64x64p = 0;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
count4x4 += cm->counts.tx.p32x32[i][TX_4X4];
count4x4 += cm->counts.tx.p16x16[i][TX_4X4];
count4x4 += cm->counts.tx.p8x8[i][TX_4X4];
count4x4_lp += cm->counts.tx.p64x64[i][TX_4X4];
count4x4_lp += cm->counts.tx.p32x32[i][TX_4X4];
count4x4_lp += cm->counts.tx.p16x16[i][TX_4X4];
count4x4_lp += cm->counts.tx.p8x8[i][TX_4X4];
count8x8_lp += cm->counts.tx.p64x64[i][TX_8X8];
count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8];
count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8];
count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8];
count16x16_lp += cm->counts.tx.p64x64[i][TX_16X16];
count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16];
count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
count32x32_lp += cm->counts.tx.p64x64[i][TX_32X32];
count32x32_32x32p += cm->counts.tx.p32x32[i][TX_32X32];
count64x64_64x64p += cm->counts.tx.p64x64[i][TX_64X64];
}
if (count4x4_lp == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
count32x32_lp == 0 && count32x32_32x32p == 0 &&
count64x64_64x64p == 0) {
cm->tx_mode = ALLOW_8X8;
reset_skip_tx_size(cm, TX_8X8);
} else if (count8x8_8x8p == 0 && count8x8_lp == 0 &&
count16x16_16x16p == 0 && count16x16_lp == 0 &&
count32x32_32x32p == 0 && count32x32_lp == 0 &&
count64x64_64x64p == 0) {
cm->tx_mode = ONLY_4X4;
reset_skip_tx_size(cm, TX_4X4);
} else if (count4x4_lp == 0 && count8x8_lp == 0 && count16x16_lp == 0 &&
count32x32_lp == 0) {
cm->tx_mode = ALLOW_64X64;
} else if (count4x4_lp == 0 && count8x8_lp == 0 && count16x16_lp == 0 &&
count64x64_64x64p == 0) {
cm->tx_mode = ALLOW_32X32;
reset_skip_tx_size(cm, TX_32X32);
} else if (count4x4_lp == 0 && count8x8_lp == 0 &&
count32x32_lp == 0 && count32x32_32x32p == 0 &&
count64x64_64x64p == 0) {
cm->tx_mode = ALLOW_16X16;
reset_skip_tx_size(cm, TX_16X16);
}
}
#else
if (cm->tx_mode == TX_MODE_SELECT) {
int count4x4_lp = 0;
int count8x8_8x8p = 0, count8x8_lp = 0;
int count16x16_16x16p = 0, count16x16_lp = 0;
int count32x32_32x32p = 0;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
count4x4_lp += cm->counts.tx.p32x32[i][TX_4X4];
count4x4_lp += cm->counts.tx.p16x16[i][TX_4X4];
count4x4_lp += cm->counts.tx.p8x8[i][TX_4X4];
count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8];
count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8];
count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8];
count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16];
count32x32 += cm->counts.tx.p32x32[i][TX_32X32];
count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
count32x32_32x32p += cm->counts.tx.p32x32[i][TX_32X32];
}
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
count32x32 == 0) {
if (count4x4_lp == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
count32x32_32x32p == 0) {
cm->tx_mode = ALLOW_8X8;
reset_skip_tx_size(cm, TX_8X8);
} else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
count8x8_lp == 0 && count16x16_lp == 0 &&
count32x32_32x32p == 0) {
cm->tx_mode = ONLY_4X4;
reset_skip_tx_size(cm, TX_4X4);
} else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
} else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4_lp == 0) {
cm->tx_mode = ALLOW_32X32;
} else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
} else if (count32x32_32x32p == 0 && count8x8_lp == 0 &&
count4x4_lp == 0) {
cm->tx_mode = ALLOW_16X16;
reset_skip_tx_size(cm, TX_16X16);
}
}
#endif
} else {
cm->reference_mode = SINGLE_REFERENCE;
encode_frame_internal(cpi);

View File

@ -135,16 +135,16 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi);
vp9_token_state tokens[1025][2];
unsigned best_index[1025][2];
uint8_t token_cache[1024];
vp9_token_state tokens[MAX_NUM_COEFS + 1][2];
unsigned best_index[MAX_NUM_COEFS + 1][2];
uint8_t token_cache[MAX_NUM_COEFS];
const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const int eob = p->eobs[block];
const PLANE_TYPE type = pd->plane_type;
const int default_eob = 16 << (tx_size << 1);
const int mul = 1 + (tx_size == TX_32X32);
const int mul = 1 << (tx_size >= TX_32X32 ? tx_size - TX_16X16 : 0);
const int16_t *dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
const scan_order *const so = get_scan(xd, tx_size, type, block);
@ -392,6 +392,16 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp_64x64(coeff, 4096, x->skip_block, p->zbin,
p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant,
p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
@ -429,6 +439,15 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_fdct64x64(src_diff, coeff, diff_stride);
vp9_quantize_fp_64x64(coeff, 4096, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
@ -482,6 +501,14 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_highbd_fdct64x64_1(src_diff, coeff, diff_stride);
vp9_highbd_quantize_dc_64x64(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
vp9_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
@ -514,6 +541,14 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_fdct64x64_1(src_diff, coeff, diff_stride);
vp9_quantize_dc_64x64(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
vp9_fdct32x32_1(src_diff, coeff, diff_stride);
vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
@ -563,6 +598,15 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff,
dqcoeff, pd->dequant, p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
@ -599,6 +643,15 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_fdct64x64(src_diff, coeff, diff_stride);
vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
@ -649,6 +702,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
a = &ctx->ta[plane][i];
l = &ctx->tl[plane][j];
if (plane) assert(tx_size != TX_64X64);
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
@ -695,6 +749,12 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_highbd_idct64x64_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
p->eobs[block], xd->bd);
@ -722,6 +782,11 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
vp9_idct64x64_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
#endif
case TX_32X32:
vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
@ -832,6 +897,29 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
scan_order = &vp9_default_scan_orders[TX_64X64];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 8, bwl, TX_64X64, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_highbd_subtract_block(64, 64, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
vp9_highbd_fdct64x64(src_diff, coeff, diff_stride);
vp9_highbd_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin,
p->round, p->quant, p->quant_shift,
qcoeff, dqcoeff, pd->dequant,
p->zbin_extra, eob,
scan_order->scan, scan_order->iscan);
if (!x->skip_encode && *eob) {
vp9_highbd_idct64x64_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
}
}
break;
#endif // CONFIG_TX64X64
case TX_32X32:
scan_order = &vp9_default_scan_orders[TX_32X32];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
@ -941,6 +1029,28 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
#endif // CONFIG_VP9_HIGHBITDEPTH
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
assert(plane == 0);
scan_order = &vp9_default_scan_orders[TX_64X64];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
vp9_predict_intra_block(xd, block >> 8, bwl, TX_64X64, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vp9_subtract_block(64, 64, src_diff, diff_stride,
src, src_stride, dst, dst_stride);
vp9_fdct64x64(src_diff, coeff, diff_stride);
vp9_quantize_b_64x64(coeff, 4096, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
scan_order->iscan);
}
if (!x->skip_encode && *eob)
vp9_idct64x64_add(dqcoeff, dst, dst_stride, *eob);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
scan_order = &vp9_default_scan_orders[TX_32X32];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;

View File

@ -3120,7 +3120,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
release_scaled_references(cpi);
vp9_update_reference_frames(cpi);
for (t = TX_4X4; t <= TX_32X32; t++)
for (t = TX_4X4; t < TX_SIZES; t++)
full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)

View File

@ -65,10 +65,15 @@ void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block,
}
#endif
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
static INLINE void quantize_dc_bigtx(const tran_low_t *coeff_ptr,
int skip_block,
const int16_t *round_ptr,
const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr,
int logsizeby32) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
@ -78,24 +83,43 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
if (!skip_block) {
tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant) >> 15;
tmp = (tmp * quant) >> (15 - logsizeby32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / (2 << logsizeby32);
if (tmp)
eob = 0;
}
*eob_ptr = eob + 1;
}
void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
}
#if CONFIG_TX64X64
void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
}
#endif // CONFIG_TX64X64
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
int skip_block,
const int16_t *round_ptr,
const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr) {
static INLINE void highbd_quantize_dc_bigtx(const tran_low_t *coeff_ptr,
int skip_block,
const int16_t *round_ptr,
const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr,
int logsizeby32) {
int eob = -1;
if (!skip_block) {
@ -106,15 +130,41 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
const int64_t tmp =
(clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) *
quant) >> 15;
quant) >> (15 - logsizeby32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / (2 << logsizeby32);
if (tmp)
eob = 0;
}
*eob_ptr = eob + 1;
}
#endif
void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
int skip_block,
const int16_t *round_ptr,
const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr) {
highbd_quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 0);
}
#if CONFIG_TX64X64
void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
int skip_block,
const int16_t *round_ptr,
const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr) {
highbd_quantize_dc_bigtx(coeff_ptr, skip_block, round_ptr, quant,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, 1);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
@ -210,15 +260,21 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
// TODO(jingning) Refactor this file and combine functions with similar
// operations.
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
static INLINE void quantize_fp_bigtx(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan,
int logsizeby32) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
@ -236,12 +292,13 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int tmp = 0;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
if (abs_coeff >= (dequant_ptr[rc != 0] >> (2 + logsizeby32))) {
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
tmp = (abs_coeff * quant_ptr[rc != 0]) >> (15 - logsizeby32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
(2 << logsizeby32);
}
if (tmp)
@ -251,18 +308,64 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 0);
}
#if CONFIG_TX64X64
void vp9_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 1);
}
#endif // CONFIG_TX64X64
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
static INLINE void highbd_quantize_fp_bigtx(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan,
int logsizeby32) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
@ -280,12 +383,13 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
int64_t tmp = 0;
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
if (abs_coeff >= (dequant_ptr[rc != 0] >> (2 + logsizeby32))) {
tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
INT32_MIN, INT32_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 15;
tmp = (tmp * quant_ptr[rc != 0]) >> (15 - logsizeby32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
(2 << logsizeby32);
}
if (tmp)
@ -294,7 +398,49 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
}
*eob_ptr = eob + 1;
}
#endif
void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
highbd_quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 0);
}
#if CONFIG_TX64X64
void vp9_highbd_quantize_fp_64x64_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
highbd_quantize_fp_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 1);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
@ -403,23 +549,29 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
}
*eob_ptr = eob + 1;
}
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
static INLINE void quantize_b_bigtx(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan,
int logsizeby32) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
int idx = 0;
int idx_arr[1024];
int idx_arr[MAX_NUM_COEFS];
int i, eob = -1;
(void)iscan;
@ -446,13 +598,14 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int coeff_sign = (coeff >> 31);
int tmp;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], (1 + logsizeby32));
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
quant_shift_ptr[rc != 0]) >> 15;
quant_shift_ptr[rc != 0]) >> (15 - logsizeby32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
(2 << logsizeby32);
if (tmp)
eob = idx_arr[i];
@ -461,24 +614,70 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 0);
}
#if CONFIG_TX64X64
void vp9_quantize_b_64x64_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 1);
}
#endif // CONFIG_TX64X64
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
static INLINE void highbd_quantize_b_bigtx(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan,
int logsizeby32) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int idx = 0;
int idx_arr[1024];
int idx_arr[MAX_NUM_COEFS];
int i, eob = -1;
(void)iscan;
@ -504,14 +703,15 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff +
ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
INT32_MIN, INT32_MAX);
int64_t tmp = clamp(
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], (1 + logsizeby32)),
INT32_MIN, INT32_MAX);
tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
quant_shift_ptr[rc != 0]) >> 15;
quant_shift_ptr[rc != 0]) >> (15 - logsizeby32);
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] /
(2 << logsizeby32);
if (tmp)
eob = idx_arr[i];
@ -519,7 +719,49 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
}
*eob_ptr = eob + 1;
}
#endif
void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
highbd_quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 0);
}
#if CONFIG_TX64X64
void vp9_highbd_quantize_b_64x64_c(const tran_low_t *coeff_ptr,
intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr,
const int16_t *round_ptr,
const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr,
int zbin_oq_value,
uint16_t *eob_ptr,
const int16_t *scan,
const int16_t *iscan) {
highbd_quantize_b_bigtx(coeff_ptr, n_coeffs, skip_block,
zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
zbin_oq_value, eob_ptr, scan, iscan, 1);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_VP9_HIGHBITDEPTH
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan) {
@ -530,21 +772,21 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block),
16, x->skip_block,
p->zbin, p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block),
pd->dequant, p->zbin_extra, &p->eobs[block],
scan, iscan);
16, x->skip_block,
p->zbin, p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block),
pd->dequant, p->zbin_extra, &p->eobs[block],
scan, iscan);
return;
}
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
16, x->skip_block,
p->zbin, p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block),
pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan);
16, x->skip_block,
p->zbin, p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block),
pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan);
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {

View File

@ -45,6 +45,12 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
#if CONFIG_TX64X64
void vp9_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
#endif // CONFIG_TX64X64
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan);
@ -61,7 +67,17 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr);
#endif
#if CONFIG_TX64X64
void vp9_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr,
int skip_block,
const int16_t *round_ptr,
const int16_t quant_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr);
#endif // CONFIG_TX64X64
#endif // CONFIG_VP9_HIGHBITDEPTH
struct VP9_COMP;
struct VP9Common;

View File

@ -88,7 +88,7 @@ static void fill_token_costs(vp9_coeff_cost *c,
vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
int i, j, k, l;
TX_SIZE t;
for (t = TX_4X4; t <= TX_32X32; ++t)
for (t = TX_4X4; t < TX_SIZES; ++t)
for (i = 0; i < PLANE_TYPES; ++i)
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
@ -437,6 +437,14 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
#if CONFIG_TX64X64
case TX_64X64:
for (i = 0; i < num_4x4_w; i += 16)
t_above[i] = !!*(const uint64_t *)&above[i];
for (i = 0; i < num_4x4_h; i += 16)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
#endif
default:
assert(0 && "Invalid transform size.");
break;

View File

@ -340,6 +340,9 @@ static const int16_t band_counts[TX_SIZES][8] = {
{ 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 },
{ 1, 2, 3, 4, 11, 1024 - 21, 0 },
#if CONFIG_TX64X64
{ 1, 2, 3, 4, 11, 4096 - 21, 0 },
#endif
};
static INLINE int cost_coeffs(MACROBLOCK *x,
int plane, int block,
@ -357,7 +360,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size][type][is_inter_block(mbmi)];
uint8_t token_cache[32 * 32];
uint8_t token_cache[MAX_NUM_COEFS];
int pt = combine_entropy_contexts(*A, *L);
int c, cost;
// Check for consistency of tx_size with mode info
@ -416,6 +419,8 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
return cost;
}
#define right_shift_signed(x, s) ((s) < 0 ? (x) << (-(s)) : (x) >> (s))
#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
struct rdcost_block_args* args, int bd) {
@ -429,17 +434,23 @@ static void dist_block(int plane, int block, TX_SIZE tx_size,
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
int64_t this_sse;
#if CONFIG_TX64X64
int shift = (tx_size == TX_64X64 ? -2 : (tx_size == TX_32X32 ? 0 : 2));
#else
int shift = tx_size == TX_32X32 ? 0 : 2;
#endif
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
#if CONFIG_VP9_HIGHBITDEPTH
args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse, bd) >> shift;
args->dist = right_shift_signed(
vp9_highbd_block_error(
coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd), shift);
#else
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
args->dist = right_shift_signed(
vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse), shift);
#endif // CONFIG_VP9_HIGHBITDEPTH
args->sse = this_sse >> shift;
args->sse = right_shift_signed(this_sse, shift);
if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
// TODO(jingning): tune the model to better capture the distortion.
@ -514,9 +525,12 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
#if CONFIG_VP9_HIGHBITDEPTH
dc_correct >>= ((xd->bd - 8) * 2);
#endif
if (tx_size != TX_32X32)
if (tx_size < TX_32X32)
dc_correct >>= 2;
#if CONFIG_TX64X64
else if (tx_size == TX_64X64)
dc_correct <<= 2;
#endif
args->dist = MAX(0, args->sse - dc_correct);
}
} else {
@ -629,10 +643,15 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES], sse[TX_SIZES];
int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX}};
int64_t rd[TX_SIZES][2] = {
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
#if CONFIG_TX64X64
{INT64_MAX, INT64_MAX},
#endif
};
int n, m;
int s0, s1;
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
@ -681,7 +700,6 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
best_tx : MIN(max_tx_size, max_mode_tx_size);
*distortion = d[mbmi->tx_size];
*rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
*skip = s[mbmi->tx_size];
@ -691,8 +709,14 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
#if CONFIG_TX64X64
tx_cache[ALLOW_64X64] = rd[MIN(max_tx_size, TX_64X64)][0];
#endif
if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
#if CONFIG_TX64X64
if (max_tx_size >= TX_64X64 && best_tx == TX_64X64) {
tx_cache[TX_MODE_SELECT] = rd[TX_64X64][1];
} else if (max_tx_size >= TX_32X32 && best_tx == TX_32X32) {
tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
} else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
@ -701,6 +725,17 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
} else {
tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
}
#else
if (max_tx_size >= TX_32X32 && best_tx == TX_32X32) {
tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
} else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
} else {
tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
}
#endif
}
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
@ -1972,12 +2007,13 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm,
}
}
static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int mode_index,
int64_t comp_pred_diff[REFERENCE_MODES],
const int64_t tx_size_diff[TX_MODES],
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
int skippable) {
static void store_coding_context(
MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int mode_index,
int64_t comp_pred_diff[REFERENCE_MODES],
const int64_t tx_size_diff[TX_MODES],
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
int skippable) {
MACROBLOCKD *const xd = &x->e_mbd;
// Take a snapshot of the coding context so it can be

View File

@ -48,6 +48,10 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->adaptive_pred_interp_filter = 1;
sf->recode_loop = ALLOW_RECODE_KFARFGF;
#if CONFIG_TX64X64
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC_H_V;
#endif
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
@ -114,6 +118,10 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->recode_loop = ALLOW_RECODE_KFMAXBW;
sf->adaptive_rd_thresh = 3;
sf->mode_skip_start = 6;
#if CONFIG_TX64X64
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC;
#endif
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
@ -181,6 +189,10 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->adaptive_pred_interp_filter = 1;
sf->mv.auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
#if CONFIG_TX64X64
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC_H_V;
#endif
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@ -240,6 +252,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->intra_uv_mode_mask[i] = INTRA_DC;
}
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
#if CONFIG_TX64X64
sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
#endif
sf->frame_parameter_update = 0;
sf->mv.search_method = FAST_HEX;

View File

@ -296,7 +296,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
VP9_COMP *cpi = args->cpi;
MACROBLOCKD *xd = args->xd;
TOKENEXTRA **tp = args->tp;
uint8_t token_cache[32 * 32];
uint8_t token_cache[MAX_NUM_COEFS];
struct macroblock_plane *p = &cpi->mb.plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
@ -374,7 +374,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
counts[band[c]][pt]);
++eob_branch[band[c]][pt];
}
*tp = t;
vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, aoff, loff);