diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 1eda3cc38..9151622d3 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -12,15 +12,431 @@ #include "vp9/common/vp9_blockd.h" #include "vpx_mem/vpx_mem.h" -const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24] = { - {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}, - {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6} +const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24] = { + { 0, 0, 0, 0, + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, + 4, 4, + 5, 5, + 6, 6, + 7, 7 }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 2, 2, 2, 2, + 2, 2, 2, 2, + 4, 4, + 4, 4, + 6, 6, + 6, 6 }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 }, }; -const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24] = { - {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7}, - {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6} +const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24] = { + { 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 4, 5, + 4, 5, + 6, 7, + 6, 7 }, + { 0, 0, 0, 0, + 2, 2, 2, 2, + 0, 0, 0, 0, + 2, 2, 2, 2, + 4, 4, + 4, 4, + 6, 6, + 6, 6 }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 }, }; + +#define S(x) x + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT) +const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96] = { + { 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), + 4, 4, 4, 4, + 5, 5, 5, 5, + S(4), S(4), S(4), S(4), + S(5), S(5), S(5), S(5), + 6, 6, 6, 6, + 7, 7, 7, 7, + S(6), S(6), S(6), S(6), + S(7), S(7), S(7), S(7) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + 4, 4, 4, 4, + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + S(4), S(4), S(4), S(4), + 6, 6, 6, 6, + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + S(6), S(6), S(6), S(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6 }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }, +}; +const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96] = { + { 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7) }, + { 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6 }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + +#define T(x) x + 2 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) +#define U(x) x + 3 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) +const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + S(5), S(5), S(5), S(5), S(5), S(5), S(5), S(5), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(5), T(5), T(5), T(5), T(5), T(5), T(5), T(5), + U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), + U(5), U(5), U(5), U(5), U(5), U(5), U(5), U(5), + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + S(7), S(7), S(7), S(7), S(7), S(7), S(7), S(7), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(7), T(7), T(7), T(7), T(7), T(7), T(7), T(7), + U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6), + U(7), U(7), U(7), U(7), U(7), U(7), U(7), U(7) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), + U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6), + U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6 }, +}; +const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384] = { + { 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7) }, + { 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6 }, +}; +#undef U +#undef T +#undef S diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b35c1c246..b46dd0568 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -277,13 +277,6 @@ typedef struct blockd { union b_mode_info bmi; } BLOCKD; -typedef struct superblockd { - /* 32x32 Y and 16x16 U/V */ - DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]); - DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]); -} SUPERBLOCKD; - struct scale_factors { int x_num; int x_den; @@ -297,13 +290,11 @@ struct scale_factors { }; typedef struct macroblockd { - DECLARE_ALIGNED(16, int16_t, diff[384]); /* from idct diff */ - DECLARE_ALIGNED(16, uint8_t, predictor[384]); - DECLARE_ALIGNED(16, int16_t, qcoeff[384]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[384]); - DECLARE_ALIGNED(16, uint16_t, eobs[24]); - - SUPERBLOCKD sb_coeff_data; + DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ + DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks + DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]); + DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]); /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */ BLOCKD block[24]; @@ -451,8 +442,12 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { } } -extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24]; -extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24]; +extern const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24]; +extern const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24]; +extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96]; +extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96]; +extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384]; +extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384]; #define USE_ADST_FOR_I16X16_8X8 0 #define USE_ADST_FOR_I16X16_4X4 0 diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 6309566a7..204e65af6 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -270,85 +270,85 @@ static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES] = { }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 202, 29, 181, 221, 168, 177, 217, 162, 235, 202, 157 }, - { 117, 39, 146, 207, 155, 172, 203, 155, 236, 192, 208 }, - { 46, 40, 99, 171, 136, 161, 176, 140, 229, 177, 208 } + { 210, 33, 210, 232, 185, 185, 210, 166, 207, 192, 146 }, + { 118, 47, 169, 220, 170, 179, 201, 160, 231, 183, 211 }, + { 40, 52, 119, 203, 146, 169, 207, 160, 242, 194, 222 } }, { /* Coeff Band 1 */ - { 1, 138, 204, 227, 179, 181, 224, 161, 249, 203, 237 }, - { 116, 138, 209, 227, 179, 180, 222, 165, 248, 204, 241 }, - { 63, 112, 184, 227, 183, 178, 223, 167, 248, 206, 237 }, - { 47, 84, 140, 219, 163, 177, 223, 160, 249, 207, 241 }, - { 25, 53, 76, 179, 120, 156, 217, 152, 248, 205, 232 }, - { 10, 23, 29, 76, 91, 132, 145, 109, 228, 169, 214 } + { 1, 158, 215, 239, 192, 188, 234, 174, 253, 219, 230 }, + { 130, 149, 210, 238, 191, 188, 233, 174, 253, 221, 240 }, + { 59, 123, 193, 237, 188, 187, 232, 174, 252, 220, 246 }, + { 22, 89, 154, 232, 172, 183, 233, 173, 253, 219, 237 }, + { 4, 49, 83, 193, 128, 160, 227, 161, 253, 219, 233 }, + { 1, 18, 27, 87, 90, 133, 160, 112, 242, 185, 231 } }, { /* Coeff Band 2 */ - { 1, 69, 198, 223, 179, 177, 225, 154, 251, 208, 227 }, - { 78, 78, 170, 223, 170, 179, 218, 162, 248, 203, 245 }, - { 26, 69, 117, 209, 154, 170, 215, 160, 249, 205, 239 }, - { 16, 54, 79, 180, 119, 156, 208, 151, 248, 201, 238 }, - { 12, 43, 45, 119, 102, 142, 186, 126, 245, 193, 236 }, - { 1, 24, 22, 60, 92, 133, 114, 99, 221, 154, 210 } + { 1, 87, 205, 244, 192, 193, 239, 188, 252, 220, 217 }, + { 64, 93, 169, 237, 175, 186, 237, 184, 253, 222, 235 }, + { 19, 77, 130, 222, 154, 175, 231, 173, 253, 221, 223 }, + { 6, 59, 95, 196, 132, 162, 223, 160, 251, 215, 240 }, + { 1, 37, 57, 144, 109, 146, 201, 135, 250, 205, 238 }, + { 1, 17, 26, 81, 94, 138, 135, 107, 232, 168, 223 } }, { /* Coeff Band 3 */ - { 1, 135, 214, 222, 183, 178, 230, 144, 252, 208, 241 }, - { 107, 122, 201, 229, 181, 182, 221, 165, 250, 202, 243 }, - { 38, 100, 168, 221, 168, 176, 220, 166, 250, 208, 240 }, - { 21, 83, 125, 206, 149, 167, 217, 160, 250, 209, 238 }, - { 16, 65, 80, 164, 122, 156, 208, 139, 250, 206, 246 }, - { 3, 37, 43, 104, 103, 143, 156, 118, 237, 173, 227 } + { 1, 150, 219, 243, 198, 192, 237, 182, 253, 227, 245 }, + { 88, 130, 202, 239, 190, 188, 236, 180, 253, 224, 255 }, + { 25, 103, 172, 231, 175, 182, 234, 174, 253, 227, 248 }, + { 7, 78, 128, 215, 156, 172, 228, 166, 252, 222, 248 }, + { 1, 48, 76, 175, 121, 155, 212, 149, 251, 213, 237 }, + { 1, 22, 35, 101, 97, 141, 161, 120, 236, 181, 213 } }, { /* Coeff Band 4 */ - { 1, 169, 223, 233, 193, 184, 234, 150, 254, 206, 243 }, - { 83, 140, 201, 233, 184, 185, 228, 168, 252, 203, 223 }, - { 19, 104, 158, 225, 168, 179, 228, 169, 253, 207, 248 }, - { 10, 76, 117, 209, 145, 168, 223, 166, 252, 210, 243 }, - { 8, 59, 79, 163, 119, 153, 213, 142, 250, 205, 230 }, - { 1, 31, 43, 100, 103, 144, 149, 116, 240, 171, 221 } + { 1, 177, 228, 247, 206, 197, 243, 191, 255, 232, 255 }, + { 76, 143, 205, 243, 192, 192, 241, 189, 253, 223, 255 }, + { 17, 107, 163, 233, 170, 183, 239, 183, 253, 227, 218 }, + { 3, 75, 118, 216, 147, 171, 234, 174, 253, 220, 249 }, + { 1, 43, 71, 174, 118, 154, 217, 153, 250, 211, 240 }, + { 1, 19, 31, 93, 93, 136, 154, 116, 235, 178, 228 } }, { /* Coeff Band 5 */ - { 1, 190, 234, 247, 211, 197, 239, 172, 255, 208, 236 }, - { 65, 152, 218, 244, 199, 194, 236, 184, 252, 199, 249 }, - { 17, 109, 173, 237, 179, 186, 235, 183, 250, 205, 255 }, - { 6, 78, 127, 219, 153, 173, 231, 177, 251, 210, 249 }, - { 3, 56, 77, 172, 121, 157, 215, 152, 249, 209, 247 }, - { 1, 29, 38, 96, 97, 144, 152, 114, 239, 169, 243 } + { 1, 192, 230, 251, 215, 205, 245, 201, 254, 229, 255 }, + { 66, 142, 206, 248, 200, 202, 244, 197, 255, 224, 255 }, + { 21, 107, 166, 241, 176, 191, 241, 192, 253, 230, 255 }, + { 5, 79, 129, 221, 150, 173, 237, 178, 254, 226, 255 }, + { 1, 43, 72, 173, 117, 151, 217, 150, 253, 216, 245 }, + { 1, 17, 28, 93, 95, 139, 162, 114, 245, 187, 235 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 223, 71, 225, 221, 176, 169, 242, 165, 248, 216, 201 }, - { 147, 79, 197, 215, 175, 172, 230, 154, 243, 203, 184 }, - { 69, 75, 152, 197, 158, 168, 203, 144, 231, 187, 177 } + { 235, 68, 250, 244, 206, 192, 229, 177, 248, 215, 237 }, + { 169, 88, 225, 235, 191, 184, 222, 170, 246, 205, 237 }, + { 65, 100, 171, 214, 166, 173, 216, 157, 249, 213, 215 } }, { /* Coeff Band 1 */ - { 1, 168, 219, 195, 168, 151, 249, 131, 255, 221, 255 }, - { 152, 156, 226, 210, 189, 173, 240, 121, 255, 215, 238 }, - { 82, 128, 198, 239, 201, 194, 220, 151, 254, 202, 251 }, - { 74, 107, 150, 236, 163, 187, 222, 177, 255, 204, 255 }, - { 59, 103, 120, 181, 125, 148, 232, 157, 255, 219, 245 }, - { 21, 63, 84, 129, 122, 150, 171, 118, 246, 196, 226 } + { 1, 191, 246, 250, 217, 202, 244, 195, 255, 226, 128 }, + { 177, 169, 236, 250, 216, 201, 244, 194, 251, 228, 255 }, + { 70, 132, 205, 250, 209, 205, 246, 193, 254, 246, 255 }, + { 41, 108, 165, 244, 172, 194, 246, 202, 255, 229, 255 }, + { 23, 84, 126, 207, 140, 162, 244, 179, 254, 237, 255 }, + { 11, 57, 83, 149, 127, 156, 180, 126, 247, 202, 220 } }, { /* Coeff Band 2 */ - { 1, 133, 219, 202, 174, 158, 244, 133, 255, 214, 237 }, - { 101, 132, 204, 221, 187, 183, 225, 131, 253, 201, 247 }, - { 41, 107, 147, 228, 174, 187, 211, 162, 252, 201, 246 }, - { 40, 107, 107, 205, 129, 162, 213, 164, 252, 206, 232 }, - { 24, 140, 90, 122, 111, 141, 210, 127, 251, 208, 239 }, - { 1, 59, 55, 91, 111, 141, 144, 109, 241, 180, 226 } + { 1, 169, 240, 250, 212, 202, 242, 192, 252, 222, 255 }, + { 105, 151, 215, 246, 200, 197, 240, 190, 253, 221, 255 }, + { 24, 111, 166, 237, 177, 188, 236, 183, 252, 213, 255 }, + { 9, 83, 122, 218, 148, 170, 233, 174, 250, 215, 242 }, + { 1, 55, 77, 168, 118, 152, 215, 150, 248, 213, 226 }, + { 1, 26, 36, 104, 98, 146, 149, 116, 235, 182, 225 } }, { /* Coeff Band 3 */ - { 1, 170, 226, 200, 179, 153, 245, 138, 255, 214, 241 }, - { 111, 149, 217, 226, 194, 186, 223, 137, 255, 211, 253 }, - { 40, 113, 174, 228, 180, 183, 211, 165, 255, 212, 247 }, - { 44, 101, 126, 210, 151, 167, 212, 161, 255, 217, 241 }, - { 43, 131, 103, 146, 119, 148, 211, 136, 254, 216, 250 }, - { 1, 57, 63, 112, 116, 145, 158, 115, 249, 193, 236 } + { 1, 191, 243, 251, 219, 204, 246, 196, 255, 230, 128 }, + { 97, 168, 225, 248, 207, 198, 244, 193, 254, 225, 192 }, + { 15, 122, 182, 241, 187, 188, 241, 190, 251, 231, 228 }, + { 3, 83, 131, 226, 160, 178, 237, 180, 251, 222, 205 }, + { 1, 49, 77, 184, 121, 155, 222, 159, 249, 216, 249 }, + { 1, 21, 32, 98, 98, 140, 152, 113, 233, 173, 243 } }, { /* Coeff Band 4 */ - { 1, 186, 233, 216, 191, 163, 241, 143, 255, 210, 255 }, - { 91, 161, 214, 225, 190, 181, 224, 150, 255, 212, 253 }, - { 26, 117, 163, 220, 172, 180, 218, 148, 255, 215, 252 }, - { 27, 90, 122, 203, 143, 167, 212, 159, 255, 213, 255 }, - { 21, 98, 113, 163, 130, 153, 208, 141, 255, 215, 248 }, - { 1, 47, 66, 130, 118, 151, 167, 123, 252, 199, 235 } + { 1, 202, 242, 253, 226, 212, 245, 205, 254, 226, 255 }, + { 83, 168, 219, 252, 212, 211, 244, 200, 250, 215, 255 }, + { 9, 143, 174, 245, 183, 197, 241, 194, 254, 217, 255 }, + { 1, 105, 129, 228, 154, 179, 233, 179, 253, 211, 255 }, + { 1, 47, 72, 177, 116, 152, 214, 157, 251, 209, 255 }, + { 1, 18, 26, 79, 94, 137, 150, 109, 246, 175, 248 } }, { /* Coeff Band 5 */ - { 1, 195, 236, 245, 211, 195, 238, 171, 255, 209, 248 }, - { 65, 156, 218, 245, 200, 196, 230, 185, 255, 212, 248 }, - { 13, 112, 172, 238, 180, 189, 231, 185, 255, 213, 250 }, - { 6, 83, 130, 224, 155, 177, 227, 180, 255, 214, 244 }, - { 5, 71, 91, 185, 133, 160, 214, 154, 254, 212, 248 }, - { 1, 45, 63, 128, 112, 147, 169, 129, 248, 190, 236 } + { 1, 205, 236, 254, 233, 221, 247, 201, 255, 220, 128 }, + { 87, 149, 205, 254, 211, 219, 245, 207, 255, 239, 128 }, + { 56, 122, 162, 248, 164, 195, 246, 211, 255, 231, 128 }, + { 26, 108, 163, 224, 149, 169, 240, 187, 255, 238, 255 }, + { 1, 54, 89, 171, 123, 152, 219, 148, 254, 226, 255 }, + { 1, 21, 34, 99, 90, 140, 174, 112, 252, 210, 255 } } } } @@ -441,90 +441,90 @@ static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES] = { }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 198, 28, 192, 217, 170, 174, 201, 162, 219, 179, 159 }, - { 96, 36, 145, 198, 153, 167, 193, 153, 222, 180, 177 }, - { 31, 35, 89, 156, 131, 157, 166, 136, 214, 170, 178 } + { 203, 35, 218, 235, 189, 187, 194, 174, 175, 150, 127 }, + { 95, 50, 155, 211, 161, 173, 190, 163, 198, 161, 187 }, + { 21, 46, 93, 178, 130, 157, 200, 151, 224, 186, 191 } }, { /* Coeff Band 1 */ - { 1, 138, 202, 225, 174, 178, 218, 164, 243, 200, 201 }, - { 147, 134, 202, 223, 174, 177, 215, 162, 243, 204, 220 }, - { 65, 115, 179, 224, 176, 177, 215, 162, 243, 202, 227 }, - { 25, 86, 141, 217, 163, 177, 216, 159, 243, 201, 225 }, - { 6, 48, 79, 181, 125, 157, 209, 151, 244, 201, 212 }, - { 1, 16, 25, 77, 91, 134, 132, 112, 210, 162, 180 } + { 1, 155, 198, 236, 183, 187, 223, 175, 250, 209, 255 }, + { 115, 147, 192, 235, 182, 186, 222, 173, 244, 199, 222 }, + { 43, 124, 174, 234, 178, 186, 222, 176, 249, 201, 255 }, + { 13, 96, 143, 227, 164, 181, 223, 174, 248, 197, 237 }, + { 2, 59, 91, 197, 131, 163, 213, 162, 246, 198, 241 }, + { 1, 19, 29, 85, 96, 139, 128, 116, 215, 153, 204 } }, { /* Coeff Band 2 */ - { 1, 78, 195, 222, 172, 177, 219, 162, 245, 205, 227 }, - { 67, 79, 154, 211, 158, 171, 212, 159, 243, 201, 222 }, - { 18, 63, 108, 192, 140, 163, 205, 152, 242, 197, 214 }, - { 6, 49, 77, 163, 121, 154, 192, 142, 239, 191, 216 }, - { 1, 34, 49, 112, 106, 143, 160, 122, 233, 178, 213 }, - { 1, 14, 20, 56, 93, 135, 94, 102, 189, 141, 170 } + { 1, 91, 180, 231, 170, 180, 237, 181, 248, 213, 230 }, + { 39, 83, 139, 220, 153, 173, 233, 179, 243, 200, 228 }, + { 12, 63, 106, 203, 136, 163, 227, 170, 244, 200, 234 }, + { 5, 48, 79, 178, 123, 154, 215, 155, 244, 197, 232 }, + { 1, 32, 50, 125, 104, 144, 171, 130, 238, 181, 229 }, + { 1, 12, 18, 54, 88, 131, 92, 99, 201, 142, 193 } }, { /* Coeff Band 3 */ - { 1, 137, 210, 229, 182, 181, 223, 164, 247, 214, 201 }, - { 89, 123, 189, 226, 176, 180, 217, 165, 245, 207, 216 }, - { 24, 100, 155, 217, 162, 176, 215, 163, 242, 198, 215 }, - { 8, 78, 121, 199, 147, 167, 206, 155, 241, 198, 212 }, - { 2, 52, 81, 161, 125, 156, 185, 139, 236, 186, 207 }, - { 1, 22, 35, 88, 102, 141, 121, 116, 199, 153, 179 } + { 1, 152, 202, 238, 186, 188, 227, 178, 248, 205, 229 }, + { 63, 125, 183, 234, 178, 184, 225, 179, 248, 205, 228 }, + { 15, 100, 153, 227, 166, 180, 223, 173, 244, 198, 229 }, + { 4, 76, 119, 210, 149, 170, 215, 165, 245, 200, 221 }, + { 1, 46, 73, 165, 120, 154, 192, 144, 241, 189, 225 }, + { 1, 18, 27, 78, 95, 136, 124, 110, 219, 158, 207 } }, { /* Coeff Band 4 */ - { 1, 169, 220, 239, 196, 191, 220, 173, 242, 201, 226 }, - { 64, 139, 195, 231, 183, 184, 215, 169, 240, 196, 211 }, - { 12, 103, 153, 217, 162, 174, 212, 163, 236, 195, 211 }, - { 3, 71, 109, 190, 141, 164, 202, 152, 240, 192, 220 }, - { 1, 38, 61, 139, 114, 149, 175, 133, 233, 183, 211 }, - { 1, 13, 22, 61, 93, 134, 101, 106, 194, 145, 185 } + { 1, 181, 211, 243, 197, 195, 228, 180, 249, 211, 252 }, + { 40, 138, 189, 237, 184, 189, 226, 178, 249, 208, 247 }, + { 7, 103, 153, 226, 166, 179, 223, 171, 249, 209, 224 }, + { 1, 71, 110, 200, 143, 166, 213, 159, 249, 206, 241 }, + { 1, 37, 60, 144, 111, 150, 189, 135, 245, 196, 232 }, + { 1, 15, 25, 75, 91, 134, 128, 108, 224, 163, 213 } }, { /* Coeff Band 5 */ - { 1, 204, 220, 234, 193, 185, 220, 166, 247, 207, 237 }, - { 42, 139, 187, 221, 174, 177, 215, 161, 246, 201, 242 }, - { 5, 83, 132, 204, 152, 168, 212, 158, 246, 203, 225 }, - { 1, 48, 84, 175, 126, 157, 203, 148, 245, 199, 233 }, - { 1, 24, 46, 123, 103, 142, 178, 128, 243, 189, 235 }, - { 1, 10, 19, 58, 88, 134, 109, 101, 216, 151, 216 } + { 1, 215, 219, 246, 205, 197, 236, 183, 252, 221, 235 }, + { 32, 146, 197, 239, 187, 188, 234, 180, 252, 223, 247 }, + { 6, 100, 150, 227, 167, 178, 233, 178, 252, 219, 233 }, + { 1, 63, 102, 203, 138, 167, 225, 162, 252, 216, 240 }, + { 1, 33, 56, 148, 109, 146, 202, 138, 250, 208, 237 }, + { 1, 15, 25, 75, 90, 131, 138, 108, 236, 171, 235 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 227, 36, 243, 237, 206, 186, 210, 157, 245, 195, 200 }, - { 144, 41, 214, 226, 190, 182, 207, 155, 238, 193, 177 }, - { 63, 37, 153, 199, 162, 169, 193, 145, 227, 187, 152 } + { 228, 37, 245, 229, 199, 183, 200, 146, 240, 188, 223 }, + { 138, 62, 209, 217, 184, 177, 195, 148, 246, 186, 236 }, + { 42, 79, 146, 185, 156, 167, 183, 137, 247, 189, 251 } }, { /* Coeff Band 1 */ - { 1, 170, 247, 248, 213, 201, 239, 188, 238, 203, 255 }, - { 214, 166, 242, 248, 212, 198, 236, 191, 221, 219, 199 }, - { 139, 148, 224, 247, 207, 197, 236, 189, 249, 241, 128 }, - { 102, 127, 195, 244, 190, 198, 235, 189, 239, 202, 228 }, - { 76, 106, 154, 227, 159, 176, 234, 182, 243, 216, 229 }, - { 52, 69, 93, 158, 125, 155, 173, 139, 225, 170, 209 } + { 1, 205, 242, 248, 210, 202, 245, 193, 233, 230, 255 }, + { 191, 185, 234, 249, 210, 201, 245, 194, 255, 197, 128 }, + { 112, 148, 214, 247, 208, 201, 246, 192, 255, 238, 128 }, + { 76, 120, 182, 246, 190, 198, 246, 202, 255, 244, 128 }, + { 51, 95, 145, 232, 156, 177, 246, 199, 255, 233, 128 }, + { 47, 71, 104, 195, 129, 158, 230, 167, 253, 224, 255 } }, { /* Coeff Band 2 */ - { 1, 139, 241, 245, 205, 193, 230, 177, 239, 198, 183 }, - { 131, 139, 214, 240, 191, 189, 224, 181, 236, 203, 194 }, - { 32, 102, 157, 228, 167, 177, 221, 174, 235, 191, 194 }, - { 12, 75, 112, 201, 142, 163, 208, 161, 227, 180, 200 }, - { 2, 45, 66, 142, 119, 154, 178, 141, 220, 171, 213 }, - { 1, 15, 20, 56, 102, 151, 87, 104, 182, 136, 175 } + { 1, 182, 235, 247, 204, 195, 246, 202, 255, 227, 128 }, + { 104, 145, 204, 243, 189, 191, 242, 199, 255, 229, 128 }, + { 35, 107, 159, 234, 167, 181, 244, 188, 255, 221, 128 }, + { 17, 87, 126, 216, 151, 168, 242, 179, 255, 242, 128 }, + { 4, 68, 91, 182, 131, 154, 222, 153, 255, 228, 128 }, + { 1, 55, 64, 126, 105, 137, 193, 121, 247, 194, 255 } }, { /* Coeff Band 3 */ - { 1, 174, 243, 248, 212, 201, 237, 194, 249, 207, 255 }, - { 134, 155, 223, 244, 200, 195, 230, 184, 248, 189, 233 }, - { 26, 115, 177, 235, 180, 185, 225, 176, 245, 198, 255 }, - { 8, 82, 129, 217, 156, 175, 220, 168, 243, 204, 228 }, - { 3, 48, 75, 165, 122, 155, 193, 145, 245, 189, 199 }, - { 1, 15, 27, 73, 101, 139, 117, 112, 212, 157, 209 } + { 1, 210, 239, 249, 209, 201, 249, 205, 255, 255, 128 }, + { 91, 162, 218, 247, 200, 195, 250, 199, 255, 255, 128 }, + { 16, 116, 173, 242, 184, 190, 251, 193, 255, 205, 128 }, + { 5, 85, 133, 228, 156, 178, 244, 184, 255, 251, 128 }, + { 1, 55, 83, 196, 125, 164, 236, 168, 249, 249, 255 }, + { 1, 24, 39, 127, 92, 154, 183, 133, 255, 192, 128 } }, { /* Coeff Band 4 */ - { 1, 191, 244, 248, 214, 200, 229, 185, 249, 207, 255 }, - { 106, 167, 221, 242, 198, 192, 223, 178, 245, 202, 246 }, - { 13, 117, 169, 229, 175, 182, 220, 170, 244, 202, 226 }, - { 2, 74, 114, 203, 143, 170, 211, 160, 248, 199, 232 }, - { 1, 35, 58, 141, 111, 144, 184, 132, 244, 196, 239 }, - { 1, 12, 22, 66, 91, 138, 114, 102, 225, 156, 214 } + { 1, 225, 242, 252, 218, 205, 251, 207, 255, 255, 128 }, + { 67, 174, 223, 249, 205, 199, 250, 210, 255, 234, 128 }, + { 10, 119, 177, 243, 186, 187, 253, 199, 255, 255, 128 }, + { 2, 81, 129, 228, 154, 177, 244, 193, 255, 251, 128 }, + { 1, 48, 78, 193, 122, 152, 240, 171, 255, 240, 128 }, + { 1, 19, 43, 116, 96, 128, 195, 135, 255, 234, 128 } }, { /* Coeff Band 5 */ - { 1, 220, 231, 246, 203, 196, 239, 188, 255, 212, 255 }, - { 42, 155, 203, 241, 189, 191, 235, 184, 253, 220, 255 }, - { 4, 95, 151, 230, 167, 182, 234, 178, 252, 217, 243 }, - { 1, 61, 105, 206, 140, 168, 226, 167, 250, 215, 242 }, - { 1, 31, 60, 151, 109, 148, 204, 142, 250, 208, 230 }, - { 1, 13, 26, 76, 93, 132, 139, 106, 236, 171, 237 } + { 1, 237, 210, 255, 213, 219, 255, 235, 255, 219, 128 }, + { 49, 163, 203, 252, 182, 198, 255, 235, 255, 255, 128 }, + { 23, 114, 156, 247, 196, 187, 255, 238, 255, 255, 128 }, + { 6, 71, 124, 248, 163, 202, 253, 203, 255, 255, 128 }, + { 1, 35, 74, 226, 160, 162, 246, 189, 255, 244, 128 }, + { 1, 16, 19, 136, 92, 164, 237, 108, 255, 255, 128 } } } } }; -static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { +static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ @@ -609,5 +609,89 @@ static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { { 1, 9, 16, 48, 89, 134, 89, 99, 183, 140, 169 } } } + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 176, 22, 201, 227, 185, 189, 160, 172, 115, 141, 105 }, + { 64, 33, 120, 195, 149, 171, 170, 150, 182, 175, 139 }, + { 12, 33, 68, 151, 118, 153, 172, 138, 202, 175, 153 } + }, { /* Coeff Band 1 */ + { 1, 125, 175, 228, 163, 176, 215, 171, 226, 193, 165 }, + { 127, 126, 174, 224, 163, 177, 212, 167, 225, 175, 235 }, + { 57, 114, 159, 223, 166, 175, 216, 167, 234, 182, 211 }, + { 23, 93, 133, 215, 150, 174, 216, 171, 233, 174, 176 }, + { 4, 56, 84, 178, 127, 157, 209, 149, 233, 197, 194 }, + { 1, 19, 26, 70, 93, 136, 114, 108, 193, 150, 167 } + }, { /* Coeff Band 2 */ + { 1, 76, 172, 217, 161, 172, 216, 165, 240, 188, 226 }, + { 41, 73, 136, 208, 152, 168, 214, 163, 233, 189, 248 }, + { 14, 59, 102, 195, 137, 163, 209, 158, 227, 184, 204 }, + { 4, 45, 75, 168, 122, 153, 197, 148, 231, 193, 178 }, + { 1, 33, 48, 118, 106, 148, 154, 126, 221, 168, 211 }, + { 1, 12, 16, 42, 90, 143, 61, 94, 159, 122, 167 } + }, { /* Coeff Band 3 */ + { 1, 134, 186, 226, 173, 180, 208, 172, 220, 179, 205 }, + { 60, 114, 164, 219, 166, 177, 207, 166, 231, 176, 208 }, + { 18, 90, 134, 208, 152, 175, 200, 164, 225, 181, 199 }, + { 7, 67, 102, 189, 139, 164, 192, 155, 225, 172, 209 }, + { 1, 39, 59, 137, 116, 151, 160, 132, 222, 166, 212 }, + { 1, 12, 17, 50, 93, 134, 82, 102, 181, 131, 190 } + }, { /* Coeff Band 4 */ + { 1, 160, 195, 229, 180, 185, 204, 163, 243, 185, 223 }, + { 31, 124, 170, 221, 170, 179, 201, 164, 240, 183, 223 }, + { 5, 91, 134, 204, 154, 170, 191, 155, 236, 178, 232 }, + { 1, 62, 95, 173, 135, 159, 180, 145, 234, 179, 225 }, + { 1, 30, 48, 116, 109, 147, 152, 123, 231, 170, 224 }, + { 1, 11, 17, 53, 90, 133, 93, 102, 201, 139, 202 } + }, { /* Coeff Band 5 */ + { 1, 215, 203, 233, 186, 183, 226, 170, 249, 213, 225 }, + { 13, 133, 175, 224, 170, 178, 224, 167, 250, 212, 235 }, + { 1, 83, 127, 209, 151, 169, 221, 162, 251, 212, 243 }, + { 1, 53, 85, 182, 127, 157, 213, 153, 250, 210, 234 }, + { 1, 30, 47, 131, 103, 143, 190, 132, 248, 200, 240 }, + { 1, 14, 21, 67, 89, 129, 126, 104, 232, 167, 223 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 193, 35, 239, 239, 206, 194, 195, 152, 244, 200, 255 }, + { 77, 57, 198, 224, 192, 187, 181, 145, 242, 190, 248 }, + { 21, 54, 149, 197, 172, 171, 169, 138, 238, 178, 241 } + }, { /* Coeff Band 1 */ + { 1, 227, 241, 247, 195, 195, 245, 199, 255, 255, 128 }, + { 189, 223, 237, 249, 199, 200, 238, 198, 255, 255, 128 }, + { 125, 204, 226, 247, 198, 199, 251, 213, 255, 255, 128 }, + { 101, 167, 207, 246, 193, 201, 245, 168, 255, 255, 128 }, + { 89, 121, 174, 237, 169, 184, 246, 204, 255, 255, 128 }, + { 71, 79, 135, 216, 149, 170, 234, 168, 255, 226, 128 } + }, { /* Coeff Band 2 */ + { 1, 207, 235, 250, 220, 204, 250, 201, 255, 255, 128 }, + { 103, 160, 210, 245, 195, 188, 249, 195, 255, 255, 128 }, + { 33, 130, 165, 234, 168, 183, 253, 199, 255, 255, 128 }, + { 10, 113, 138, 223, 146, 180, 248, 199, 255, 255, 128 }, + { 1, 88, 104, 172, 112, 174, 221, 126, 255, 217, 128 }, + { 1, 87, 70, 160, 68, 140, 171, 85, 255, 85, 128 } + }, { /* Coeff Band 3 */ + { 1, 230, 240, 249, 209, 200, 243, 199, 255, 228, 128 }, + { 60, 178, 218, 247, 203, 200, 247, 198, 255, 255, 128 }, + { 8, 119, 162, 241, 188, 185, 252, 202, 255, 255, 128 }, + { 2, 78, 119, 218, 149, 162, 247, 184, 255, 255, 128 }, + { 1, 48, 81, 172, 142, 148, 239, 140, 255, 239, 128 }, + { 1, 29, 23, 82, 96, 102, 181, 149, 255, 255, 128 } + }, { /* Coeff Band 4 */ + { 1, 240, 241, 250, 216, 203, 248, 188, 255, 255, 128 }, + { 60, 180, 222, 247, 202, 195, 247, 191, 255, 255, 128 }, + { 9, 120, 169, 240, 190, 189, 249, 181, 255, 255, 128 }, + { 2, 85, 126, 223, 154, 178, 240, 184, 255, 255, 128 }, + { 1, 47, 90, 198, 132, 158, 233, 162, 255, 224, 128 }, + { 1, 33, 34, 143, 116, 156, 217, 128, 255, 255, 128 } + }, { /* Coeff Band 5 */ + { 1, 250, 193, 249, 188, 193, 255, 236, 255, 255, 128 }, + { 35, 187, 185, 247, 154, 184, 255, 247, 255, 171, 128 }, + { 20, 132, 114, 223, 172, 165, 255, 229, 255, 255, 128 }, + { 4, 97, 96, 218, 96, 162, 255, 164, 255, 253, 128 }, + { 1, 57, 35, 197, 154, 173, 254, 215, 255, 255, 128 }, + { 1, 8, 2, 161, 10, 57, 230, 228, 255, 171, 128 } + } + } } }; diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 759b90128..1e3a7e17e 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -336,6 +336,6 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { BLOCK_TYPES, cm->fc.coef_counts_16x16, count_sat, update_factor); update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, - BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32, + BLOCK_TYPES, cm->fc.coef_counts_32x32, count_sat, update_factor); } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 20559a79b..8d28b0058 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -61,7 +61,6 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ /* Outside dimension. 0 = Y with DC, 1 = UV */ #define BLOCK_TYPES 2 -#define BLOCK_TYPES_32X32 1 #define REF_TYPES 2 // intra=0, inter=1 /* Middle dimension reflects the coefficient position within the transform. */ @@ -110,12 +109,24 @@ extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); void vp9_coef_tree_initialize(void); void vp9_adapt_coef_probs(struct VP9Common *); -static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { +static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { /* Clear entropy contexts */ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } +static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) { + /* Clear entropy contexts */ + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); +} + +static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) { + /* Clear entropy contexts */ + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); +} + extern const int vp9_coef_bands[32]; extern const int vp9_coef_bands4x4[16]; diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 673abd7b1..54b79ee64 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -645,7 +645,7 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { // First transform rows for (i = 0; i < 16; ++i) { idct16_1d(input, outptr); - input += half_pitch; + input += 16; outptr += 16; } @@ -655,7 +655,7 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -838,7 +838,7 @@ static const transform_2d IHT_16[] = { }; void vp9_short_iht16x16_c(int16_t *input, int16_t *output, - int input_pitch, TX_TYPE tx_type) { + int pitch, TX_TYPE tx_type) { int i, j; int16_t out[16 * 16]; int16_t *outptr = out; @@ -848,7 +848,7 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output, // Rows for (i = 0; i < 16; ++i) { ht.rows(input, outptr); - input += input_pitch; + input += 16; outptr += 16; } @@ -858,7 +858,7 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -875,7 +875,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { vpx_memset(out, 0, sizeof(out)); for (i = 0; i < 4; ++i) { idct16_1d(input, outptr); - input += half_pitch; + input += 16; outptr += 16; } @@ -885,7 +885,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j*16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j*16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -1273,7 +1273,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { // Rows for (i = 0; i < 32; ++i) { idct32_1d(input, outptr); - input += half_pitch; + input += 32; outptr += 32; } @@ -1283,7 +1283,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -1306,7 +1306,7 @@ void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) { vpx_memset(out, 0, sizeof(out)); for (i = 0; i < 4; ++i) { idct32_1d(input, outptr); - input += half_pitch; + input += 32; outptr += 32; } @@ -1316,6 +1316,6 @@ void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index 1311b9111..a26415fc3 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -11,12 +11,13 @@ #include "vp9/common/vp9_invtrans.h" #include "./vp9_rtcd.h" -void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) { - BLOCKD *b = &xd->block[block]; - if (xd->eobs[block] <= 1) - xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch); +void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, + int16_t *dqcoeff, int16_t *diff, + int pitch) { + if (eob <= 1) + xd->inv_txm4x4_1(dqcoeff, diff, pitch); else - xd->inv_txm4x4(b->dqcoeff, b->diff, pitch); + xd->inv_txm4x4(dqcoeff, diff, pitch); } void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { @@ -27,7 +28,8 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { if (tx_type != DCT_DCT) { vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_4x4(xd, i, 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + xd->block[i].diff, 32); } } } @@ -36,7 +38,8 @@ void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) { int i; for (i = 16; i < 24; i++) { - vp9_inverse_transform_b_4x4(xd, i, 16); + vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + xd->block[i].diff, 16); } } @@ -111,13 +114,170 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { vp9_inverse_transform_mbuv_8x8(xd); } -void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) { - vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64); +void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) { + vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64); } -void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) { - vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1024, - xd_sb->diff + 1024, 32); - vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280, - xd_sb->diff + 1280, 32); +void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + xd->diff + x_idx * 16 + y_idx * 32 * 16, 64); + } +} + +void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + xd->diff + x_idx * 8 + y_idx * 32 * 8, 64); + } +} + +void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + xd->diff + x_idx * 4 + y_idx * 4 * 32, 64); + } +} + +void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) { + vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024, + xd->diff + 1024, 32); + vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280, + xd->diff + 1280, 32); +} + +void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64, + xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8, + 32); + vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64, + xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8, + 32); + } +} + +void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n], + xd->dqcoeff + 1024 + n * 16, + xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4, + 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n], + xd->dqcoeff + 1280 + n * 16, + xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4, + 32); + } +} + +void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_short_idct32x32(xd->dqcoeff + n * 1024, + xd->diff + x_idx * 32 + y_idx * 32 * 64, 128); + } +} + +void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + xd->diff + x_idx * 16 + y_idx * 64 * 16, 128); + } +} + +void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + xd->diff + x_idx * 8 + y_idx * 64 * 8, 128); + } +} + +void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + xd->diff + x_idx * 4 + y_idx * 4 * 64, 128); + } +} + +void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) { + vp9_short_idct32x32(xd->dqcoeff + 4096, + xd->diff + 4096, 64); + vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024, + xd->diff + 4096 + 1024, 64); +} + +void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16; + + vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256, + xd->diff + 4096 + off, 64); + vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256, + xd->diff + 4096 + 1024 + off, 64); + } +} + +void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64, + xd->diff + 4096 + off, 64); + vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64, + xd->diff + 4096 + 1024 + off, 64); + } +} + +void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n], + xd->dqcoeff + 4096 + n * 16, + xd->diff + 4096 + off, 64); + vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n], + xd->dqcoeff + 4096 + 1024 + n * 16, + xd->diff + 4096 + 1024 + off, 64); + } } diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index abd5b0fad..89916570d 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -15,7 +15,9 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" -void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch); +void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, + int16_t *dqcoeff, int16_t *diff, + int pitch); void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd); @@ -39,7 +41,21 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); -void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); +void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd); +void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd); + +void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd); #endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index c4bb12340..48d19a332 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -61,7 +61,7 @@ typedef struct frame_contexts { vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; nmv_context nmvc; nmv_context pre_nmvc; @@ -83,12 +83,12 @@ typedef struct frame_contexts { vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES]; vp9_coeff_count coef_counts_4x4[BLOCK_TYPES]; vp9_coeff_count coef_counts_8x8[BLOCK_TYPES]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES]; - vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; nmv_context_counts NMVcount; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index caf7b8d22..d67b6d3df 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -117,7 +117,7 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { int x, y, stride = xd->block[0].dst_stride; - int16_t *diff = xd->sb_coeff_data.diff; + int16_t *diff = xd->diff; for (y = 0; y < 32; y++) { for (x = 0; x < 32; x++) { @@ -130,8 +130,8 @@ void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { int x, y, stride = xd->block[16].dst_stride; - int16_t *udiff = xd->sb_coeff_data.diff + 1024; - int16_t *vdiff = xd->sb_coeff_data.diff + 1280; + int16_t *udiff = xd->diff + 1024; + int16_t *vdiff = xd->diff + 1280; for (y = 0; y < 16; y++) { for (x = 0; x < 16; x++) { @@ -145,6 +145,36 @@ void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { } } +void vp9_recon_sb64y_s_c(MACROBLOCKD *xd, uint8_t *dst) { + int x, y, stride = xd->block[0].dst_stride; + int16_t *diff = xd->diff; + + for (y = 0; y < 64; y++) { + for (x = 0; x < 64; x++) { + dst[x] = clip_pixel(dst[x] + diff[x]); + } + dst += stride; + diff += 64; + } +} + +void vp9_recon_sb64uv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { + int x, y, stride = xd->block[16].dst_stride; + int16_t *udiff = xd->diff + 4096; + int16_t *vdiff = xd->diff + 4096 + 1024; + + for (y = 0; y < 32; y++) { + for (x = 0; x < 32; x++) { + udst[x] = clip_pixel(udst[x] + udiff[x]); + vdst[x] = clip_pixel(vdst[x] + vdiff[x]); + } + udst += stride; + vdst += stride; + udiff += 32; + vdiff += 32; + } +} + void vp9_recon_mby_c(MACROBLOCKD *xd) { int i; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index e6dcff4d1..db1b4673a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -97,6 +97,12 @@ specialize vp9_recon_sby_s prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" specialize void vp9_recon_sbuv_s +prototype void vp9_recon_sb64y_s "struct macroblockd *x, uint8_t *dst" +specialize vp9_recon_sb64y_s + +prototype void vp9_recon_sb64uv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" +specialize void vp9_recon_sb64uv_s + prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x" specialize vp9_build_intra_predictors_mby_s diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 86806d2d0..055e97b92 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -452,125 +452,12 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, } } -static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n, - int maska, int shiftb) { - int x_idx = n & maska, y_idx = n >> shiftb; - TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_16x16_c( - tx_type, xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); - } else { - vp9_dequant_idct_add_16x16( - xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); - } - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd); -}; - -static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n, - int maska, int shiftb) { - int x_idx = n & maska, y_idx = n >> shiftb; - TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - int i; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - int idx = (ib & 0x02) ? (ib + 2) : ib; - int16_t *q = xd->block[idx].qcoeff; - int16_t *dq = xd->block[0].dequant; - int stride = xd->dst.y_stride; - tx_type = get_tx_type_8x8(xd, &xd->block[ib]); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_8x8_c( - tx_type, q, dq, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - stride, stride, xd->eobs[idx]); - } else { - vp9_dequant_idct_add_8x8_c( - q, dq, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - stride, stride, xd->eobs[idx]); - } - } - } else { - vp9_dequant_idct_add_y_block_8x8_inplace_c( - xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd); - } - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd); -}; - -static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n, - int maska, int shiftb) { - int x_idx = n & maska, y_idx = n >> shiftb; - TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - int i; - for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c( - tx_type, b->qcoeff, b->dequant, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]); - } else { - xd->itxm_add( - b->qcoeff, b->dequant, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]); - } - } - } else { - vp9_dequant_idct_add_y_block_4x4_inplace_c( - xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd); - } - vp9_dequant_idct_add_uv_block_4x4_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd); -}; - static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { int n, eobtotal; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; - MODE_INFO *orig_mi = xd->mode_info_context; + MODE_INFO *mi = xd->mode_info_context; const int mis = pc->mode_info_stride; assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64); @@ -583,20 +470,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, mb_init_dequantizer(pbi, xd); if (xd->mode_info_context->mbmi.mb_skip_coeff) { - int n; - - vp9_reset_mb_tokens_context(xd); - for (n = 1; n <= 3; n++) { - if (mb_col < pc->mb_cols - n) - xd->above_context += n; - if (mb_row < pc->mb_rows - n) - xd->left_context += n; - vp9_reset_mb_tokens_context(xd); - if (mb_col < pc->mb_cols - n) - xd->above_context -= n; - if (mb_row < pc->mb_rows - n) - xd->left_context -= n; - } + vp9_reset_sb64_tokens_context(xd); /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. @@ -617,83 +491,108 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } /* dequantization and idct */ - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + eobtotal = vp9_decode_sb64_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; - if (mb_col + x_idx * 2 >= pc->mb_cols || - mb_row + y_idx * 2 >= pc->mb_rows) - continue; - - xd->left_context = pc->left_context + (y_idx << 1); - xd->above_context = pc->above_context + mb_col + (x_idx << 1); - xd->mode_info_context = orig_mi + x_idx * 2 + y_idx * 2 * mis; - eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; - if (mb_row + 1 < pc->mb_rows) { - xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; - } - } else { - vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + x_idx * 32 + - xd->dst.y_stride * y_idx * 32, - xd->dst.y_buffer + x_idx * 32 + - xd->dst.y_stride * y_idx * 32, - xd->dst.y_stride, xd->dst.y_stride, - xd->eobs[0]); - vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer + x_idx * 16 + - xd->dst.uv_stride * y_idx * 16, - xd->dst.v_buffer + x_idx * 16 + - xd->dst.uv_stride * y_idx * 16, - xd->dst.uv_stride, xd); - } + if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows) + mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } } else { - for (n = 0; n < 16; n++) { - int x_idx = n & 3, y_idx = n >> 2; - - if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) - continue; - - xd->above_context = pc->above_context + mb_col + x_idx; - xd->left_context = pc->left_context + y_idx; - xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - - eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - continue; - } - - if (tx_size == TX_16X16) { - decode_16x16_sb(pbi, xd, bc, n, 3, 2); - } else if (tx_size == TX_8X8) { - decode_8x8_sb(pbi, xd, bc, n, 3, 2); - } else { - decode_4x4_sb(pbi, xd, bc, n, 3, 2); - } + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024, + xd->block[0].dequant, + xd->dst.y_buffer + x_idx * 32 + y_idx * xd->dst.y_stride * 32, + xd->dst.y_buffer + x_idx * 32 + y_idx * xd->dst.y_stride * 32, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]); + } + vp9_dequant_idct_add_32x32(xd->qcoeff + 4096, + xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]); + vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024, + xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]); + break; + case TX_16X16: // FIXME(rbultje): adst + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); + } + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + n * 256, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.u_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 16]); + vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + 1024 + n * 256, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.v_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]); + } + break; + case TX_8X8: // FIXME(rbultje): adst + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); + } + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 4]); + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096 + 1024, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]); + } + break; + case TX_4X4: // FIXME(rbultje): adst + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); + } + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + xd->itxm_add(xd->qcoeff + 4096 + n * 16, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n]); + xd->itxm_add(xd->qcoeff + 4096 + 1024 + n * 16, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n]); + } + break; + default: assert(0); } } - - xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context; - xd->mode_info_context = orig_mi; } static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { int n, eobtotal; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; - MODE_INFO *orig_mi = xd->mode_info_context; const int mis = pc->mode_info_stride; assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32); @@ -706,16 +605,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, mb_init_dequantizer(pbi, xd); if (xd->mode_info_context->mbmi.mb_skip_coeff) { - vp9_reset_mb_tokens_context(xd); - if (mb_col < pc->mb_cols - 1) - xd->above_context++; - if (mb_row < pc->mb_rows - 1) - xd->left_context++; - vp9_reset_mb_tokens_context(xd); - if (mb_col < pc->mb_cols - 1) - xd->above_context--; - if (mb_row < pc->mb_rows - 1) - xd->left_context--; + vp9_reset_sb_tokens_context(xd); /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. @@ -736,56 +626,90 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, } /* dequantization and idct */ - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; + eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; + if (mb_row + 1 < pc->mb_rows) { + xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; - if (mb_row + 1 < pc->mb_rows) { - xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; - } - } else { - vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant, - xd->dst.y_buffer, xd->dst.y_buffer, - xd->dst.y_stride, xd->dst.y_stride, - xd->eobs[0]); - vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; } } else { - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) - continue; - - xd->above_context = pc->above_context + mb_col + x_idx; - xd->left_context = pc->left_context + y_idx + (mb_row & 2); - xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - - eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - continue; - } - - if (tx_size == TX_16X16) { - decode_16x16_sb(pbi, xd, bc, n, 1, 1); - } else if (tx_size == TX_8X8) { - decode_8x8_sb(pbi, xd, bc, n, 1, 1); - } else { - decode_4x4_sb(pbi, xd, bc, n, 1, 1); - } + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer, xd->dst.y_buffer, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[0]); + vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, + xd->block[16].dequant, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, xd); + break; + case TX_16X16: // FIXME(rbultje): adst + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_16x16( + xd->qcoeff + n * 256, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); + } + vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, + xd->block[16].dequant, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, xd); + break; + case TX_8X8: // FIXME(rbultje): adst + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); + } + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1024, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n * 4]); + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1280, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]); + } + break; + case TX_4X4: // FIXME(rbultje): adst + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); + } + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + xd->itxm_add(xd->qcoeff + 1024 + n * 16, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n]); + xd->itxm_add(xd->qcoeff + 1280 + n * 16, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n]); + } + break; + default: assert(0); } - - xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context + (mb_row & 2); - xd->mode_info_context = orig_mi; } } @@ -1187,7 +1111,7 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES); } if (pbi->common.txfm_mode > ALLOW_16X16) { - read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32); + read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES); } } diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 5a98b1150..85246d830 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -354,7 +354,7 @@ void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, int stride, MACROBLOCKD *xd) { vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, - xd->eobs[16]); + xd->eobs[64]); vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, - xd->eobs[20]); + xd->eobs[80]); } diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index d3fb25ace..a192266ef 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -90,9 +90,8 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, const int *const scan, TX_SIZE txfm_size) { ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context; ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context; - const int aidx = vp9_block2above[txfm_size][block_idx]; - const int lidx = vp9_block2left[txfm_size][block_idx]; - ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0; + int aidx, lidx; + ENTROPY_CONTEXT above_ec, left_ec; FRAME_CONTEXT *const fc = &dx->common.fc; int recent_energy = 0; int pt, c = 0; @@ -101,9 +100,22 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, vp9_coeff_count *coef_counts; const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + aidx = vp9_block2above_sb64[txfm_size][block_idx]; + lidx = vp9_block2left_sb64[txfm_size][block_idx]; + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + aidx = vp9_block2above_sb[txfm_size][block_idx]; + lidx = vp9_block2left_sb[txfm_size][block_idx]; + } else { + aidx = vp9_block2above[txfm_size][block_idx]; + lidx = vp9_block2left[txfm_size][block_idx]; + } + switch (txfm_size) { default: case TX_4X4: + above_ec = A0[aidx] != 0; + left_ec = L0[lidx] != 0; coef_probs = fc->coef_probs_4x4; coef_counts = fc->coef_counts_4x4; break; @@ -240,7 +252,7 @@ SKIP_START: if (type == PLANE_TYPE_UV) { ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); - A1[aidx] = A1[aidx + 1] = L1[aidx] = L1[lidx + 1] = A0[aidx]; + A1[aidx] = A1[aidx + 1] = L1[lidx] = L1[lidx + 1] = A0[aidx]; if (txfm_size >= TX_32X32) { ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2); ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2); @@ -272,24 +284,181 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; + int i, eobtotal = 0, seg_eob, c; - // Luma block - int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + // Luma block + c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, DCT_DCT, get_eob(xd, segment_id, 1024), - xd->sb_coeff_data.qcoeff, - vp9_default_zig_zag1d_32x32, TX_32X32); - xd->eobs[0] = c; - eobtotal += c; + xd->qcoeff, vp9_default_zig_zag1d_32x32, TX_32X32); + xd->eobs[0] = c; + eobtotal += c; - // 16x16 chroma blocks - seg_eob = get_eob(xd, segment_id, 256); - for (i = 16; i < 24; i += 4) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, - xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, - vp9_default_zig_zag1d_16x16, TX_16X16); - xd->eobs[i] = c; - eobtotal += c; + // 16x16 chroma blocks + seg_eob = get_eob(xd, segment_id, 256); + for (i = 64; i < 96; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_16X16: + // 16x16 luma blocks + seg_eob = get_eob(xd, segment_id, 256); + for (i = 0; i < 64; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + + // 16x16 chroma blocks + for (i = 64; i < 96; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_8X8: + // 8x8 luma blocks + seg_eob = get_eob(xd, segment_id, 64); + for (i = 0; i < 64; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + + // 8x8 chroma blocks + for (i = 64; i < 96; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_4X4: + // 4x4 luma blocks + seg_eob = get_eob(xd, segment_id, 16); + for (i = 0; i < 64; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + + // 4x4 chroma blocks + for (i = 64; i < 96; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + break; + default: assert(0); + } + + return eobtotal; +} + +int vp9_decode_sb64_tokens(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, + BOOL_DECODER* const bc) { + const int segment_id = xd->mode_info_context->mbmi.segment_id; + int i, eobtotal = 0, seg_eob, c; + + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + // Luma block + seg_eob = get_eob(xd, segment_id, 1024); + for (i = 0; i < 256; i += 64) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_32x32, TX_32X32); + xd->eobs[i] = c; + eobtotal += c; + } + + // 32x32 chroma blocks + for (i = 256; i < 384; i += 64) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_32x32, TX_32X32); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_16X16: + // 16x16 luma blocks + seg_eob = get_eob(xd, segment_id, 256); + for (i = 0; i < 256; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + + // 16x16 chroma blocks + for (i = 256; i < 384; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_8X8: + // 8x8 luma blocks + seg_eob = get_eob(xd, segment_id, 64); + for (i = 0; i < 256; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + + // 8x8 chroma blocks + for (i = 256; i < 384; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_4X4: + // 4x4 luma blocks + seg_eob = get_eob(xd, segment_id, 16); + for (i = 0; i < 256; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + + // 4x4 chroma blocks + for (i = 256; i < 384; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + break; + default: assert(0); } return eobtotal; diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index 926a0661f..33a34aeae 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -14,8 +14,6 @@ #include "vp9/decoder/vp9_onyxd_int.h" -void vp9_reset_mb_tokens_context(MACROBLOCKD* const); - int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, BOOL_DECODER* const bc, PLANE_TYPE type, int i); @@ -27,6 +25,10 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc); +int vp9_decode_sb64_tokens(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, + BOOL_DECODER* const bc); + int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 7101947a6..971da0509 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -45,7 +45,7 @@ int intra_mode_stats[VP9_KF_BINTRAMODES] vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES]; vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES]; vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES]; -vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; +vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES]; extern unsigned int active_section; #endif @@ -1229,7 +1229,7 @@ static void build_coeff_contexts(VP9_COMP *cpi) { #ifdef ENTROPY_STATS cpi, context_counters_32x32, #endif - cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32); + cpi->frame_branch_ct_32x32, BLOCK_TYPES); } static void update_coef_probs_common(vp9_writer* const bc, @@ -1388,7 +1388,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { cpi->frame_coef_probs_32x32, cpi->common.fc.coef_probs_32x32, cpi->frame_branch_ct_32x32, - BLOCK_TYPES_32X32); + BLOCK_TYPES); } } @@ -2103,13 +2103,13 @@ void print_tree_update_probs() { fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES, - "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]"); + "vp9_coef_update_probs_4x4[BLOCK_TYPES]"); print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES, - "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]"); + "vp9_coef_update_probs_8x8[BLOCK_TYPES]"); print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES, - "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]"); - print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32, - "vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]"); + "vp9_coef_update_probs_16x16[BLOCK_TYPES]"); + print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES, + "vp9_coef_update_probs_32x32[BLOCK_TYPES]"); fclose(f); f = fopen("treeupdate.bin", "wb"); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 79a021cfb..560c37171 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -83,20 +83,13 @@ typedef struct { int64_t txfm_rd_diff[NB_TXFM_MODES]; } PICK_MODE_CONTEXT; -typedef struct superblock { - DECLARE_ALIGNED(16, int16_t, src_diff[32*32+16*16*2]); - DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]); -} SUPERBLOCK; - typedef struct macroblock MACROBLOCK; struct macroblock { - DECLARE_ALIGNED(16, int16_t, src_diff[384]); // 16x16 Y 8x8 U 8x8 V - DECLARE_ALIGNED(16, int16_t, coeff[384]); // 16x16 Y 8x8 U 8x8 V + DECLARE_ALIGNED(16, int16_t, src_diff[64*64+32*32*2]); + DECLARE_ALIGNED(16, int16_t, coeff[64*64+32*32*2]); // 16 Y blocks, 4 U blocks, 4 V blocks, BLOCK block[24]; - SUPERBLOCK sb_coeff_data; - YV12_BUFFER_CONFIG src; MACROBLOCKD e_mbd; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 5271a597c..3b48f46c0 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1820,63 +1820,6 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #endif } -static void update_sb_skip_coeff_state(VP9_COMP *cpi, - ENTROPY_CONTEXT_PLANES ta[4], - ENTROPY_CONTEXT_PLANES tl[4], - TOKENEXTRA *t[4], - TOKENEXTRA **tp, - int skip[4], int output_enabled) { - MACROBLOCK *const x = &cpi->mb; - TOKENEXTRA tokens[4][16 * 25]; - int n_tokens[4], n; - - // if there were no skips, we don't need to do anything - if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) - return; - - // if we don't do coeff skipping for this frame, we don't - // need to do anything here - if (!cpi->common.mb_no_coeff_skip) - return; - - // if all 4 MBs skipped coeff coding, nothing to be done - if (skip[0] && skip[1] && skip[2] && skip[3]) - return; - - // so the situation now is that we want to skip coeffs - // for some MBs, but not all, and we didn't code EOB - // coefficients for them. However, the skip flag for this - // SB will be 0 overall, so we need to insert EOBs in the - // middle of the token tree. Do so here. - n_tokens[0] = t[1] - t[0]; - n_tokens[1] = t[2] - t[1]; - n_tokens[2] = t[3] - t[2]; - n_tokens[3] = *tp - t[3]; - if (n_tokens[0]) - memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0])); - if (n_tokens[1]) - memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0])); - if (n_tokens[2]) - memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0])); - if (n_tokens[3]) - memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0])); - - // reset pointer, stuff EOBs where necessary - *tp = t[0]; - for (n = 0; n < 4; n++) { - if (skip[n]) { - x->e_mbd.above_context = &ta[n]; - x->e_mbd.left_context = &tl[n]; - vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); - } else { - if (n_tokens[n]) { - memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); - } - (*tp) += n_tokens[n]; - } - } -} - static void update_sb64_skip_coeff_state(VP9_COMP *cpi, ENTROPY_CONTEXT_PLANES ta[16], ENTROPY_CONTEXT_PLANES tl[16], @@ -1994,7 +1937,9 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + const int mis = cm->mode_info_stride; unsigned char ref_pred_flag; assert(!xd->mode_info_context->mbmi.sb_type); @@ -2190,12 +2135,11 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_tokenize_mb(cpi, xd, t, !output_enabled); } else { - int mb_skip_context = - cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff : - 0; - if (cpi->common.mb_no_coeff_skip) { + // FIXME(rbultje): not tile-aware (mi - 1) + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; + + if (cm->mb_no_coeff_skip) { mbmi->mb_skip_coeff = 1; if (output_enabled) cpi->skip_true_count[mb_skip_context]++; @@ -2250,12 +2194,8 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; unsigned char ref_pred_flag; - int n; - TOKENEXTRA *tp[4]; - int skip[4]; MODE_INFO *mi = x->e_mbd.mode_info_context; unsigned int segment_id = mi->mbmi.segment_id; - ENTROPY_CONTEXT_PLANES ta[4], tl[4]; const int mis = cm->mode_info_stride; if (cm->frame_type == KEY_FRAME) { @@ -2342,118 +2282,101 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, mb_row, mb_col); } - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - if (!x->skip) { - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, - dst, dst_y_stride); - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, - usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - // TODO(rbultje): trellis optimize - vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); - vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); - vp9_recon_sby_s_c(&x->e_mbd, dst); - vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst); - - vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); - } else { - int mb_skip_context = - cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + - (mi - mis)->mbmi.mb_skip_coeff : - 0; - mi->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts_sb(xd); - } else { - vp9_stuff_sb(cpi, xd, t, !output_enabled); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } - - // copy skip flag on all mb_mode_info contexts in this SB - // if this was a skip at this txfm size - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - } - skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff; - } else { - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - xd->left_context = cm->left_context + y_idx + (mb_row & 2); - xd->above_context = cm->above_context + mb_col + x_idx; - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * mis; - - if (!x->skip) { - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_fidct_mb(x); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - - vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; - } else { - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : - 0; - xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; - if (cpi->common.mb_no_coeff_skip) { - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_mb_tokens_context(xd); - } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; + if (!x->skip) { + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, + dst, dst_y_stride); + vp9_subtract_sbuv_s_c(x->src_diff, + usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + switch (mi->mbmi.txfm_size) { + case TX_32X32: + vp9_transform_sby_32x32(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_32x32(x); + vp9_quantize_sbuv_16x16(x); + if (x->optimize) { + vp9_optimize_sby_32x32(x); + vp9_optimize_sbuv_16x16(x); } - } + vp9_inverse_transform_sby_32x32(xd); + vp9_inverse_transform_sbuv_16x16(xd); + break; + case TX_16X16: + vp9_transform_sby_16x16(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_16x16(x); + vp9_quantize_sbuv_16x16(x); + if (x->optimize) { + vp9_optimize_sby_16x16(x); + vp9_optimize_sbuv_16x16(x); + } + vp9_inverse_transform_sby_16x16(xd); + vp9_inverse_transform_sbuv_16x16(xd); + break; + case TX_8X8: + vp9_transform_sby_8x8(x); + vp9_transform_sbuv_8x8(x); + vp9_quantize_sby_8x8(x); + vp9_quantize_sbuv_8x8(x); + if (x->optimize) { + vp9_optimize_sby_8x8(x); + vp9_optimize_sbuv_8x8(x); + } + vp9_inverse_transform_sby_8x8(xd); + vp9_inverse_transform_sbuv_8x8(xd); + break; + case TX_4X4: + vp9_transform_sby_4x4(x); + vp9_transform_sbuv_4x4(x); + vp9_quantize_sby_4x4(x); + vp9_quantize_sbuv_4x4(x); + if (x->optimize) { + vp9_optimize_sby_4x4(x); + vp9_optimize_sbuv_4x4(x); + } + vp9_inverse_transform_sby_4x4(xd); + vp9_inverse_transform_sbuv_4x4(xd); + break; + default: assert(0); } + vp9_recon_sby_s_c(xd, dst); + vp9_recon_sbuv_s_c(xd, udst, vdst); - xd->mode_info_context = mi; - update_sb_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled); + vp9_tokenize_sb(cpi, xd, t, !output_enabled); + } else { + // FIXME(rbultje): not tile-aware (mi - 1) + int mb_skip_context = cm->mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; + + mi->mbmi.mb_skip_coeff = 1; + if (cm->mb_no_coeff_skip) { + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_sb_tokens_context(xd); + } else { + vp9_stuff_sb(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; + } + } + + // copy skip flag on all mb_mode_info contexts in this SB + // if this was a skip at this txfm size + if (mb_col < cm->mb_cols - 1) + mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + if (mb_row < cm->mb_rows - 1) { + mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + if (mb_col < cm->mb_cols - 1) + mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || + !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; } else { - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? - TX_32X32 : - cm->txfm_mode; + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; mi->mbmi.txfm_size = sz; if (mb_col < cm->mb_cols - 1) mi[1].mbmi.txfm_size = sz; @@ -2481,11 +2404,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; unsigned char ref_pred_flag; int n; - TOKENEXTRA *tp[16]; - int skip[16]; MODE_INFO *mi = x->e_mbd.mode_info_context; unsigned int segment_id = mi->mbmi.segment_id; - ENTROPY_CONTEXT_PLANES ta[16], tl[16]; const int mis = cm->mode_info_stride; if (cm->frame_type == KEY_FRAME) { @@ -2571,149 +2491,99 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, mb_row, mb_col); } - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - int n; + if (!x->skip) { + vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); + vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - xd->mode_info_context = mi + x_idx * 2 + mis * y_idx * 2; - xd->left_context = cm->left_context + (y_idx << 1); - xd->above_context = cm->above_context + mb_col + (x_idx << 1); - memcpy(&ta[n * 2], xd->above_context, sizeof(*ta) * 2); - memcpy(&tl[n * 2], xd->left_context, sizeof(*tl) * 2); - tp[n] = *t; - xd->mode_info_context = mi + x_idx * 2 + y_idx * mis * 2; - if (!x->skip) { - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, - src + x_idx * 32 + y_idx * 32 * src_y_stride, - src_y_stride, - dst + x_idx * 32 + y_idx * 32 * dst_y_stride, - dst_y_stride); - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, - usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - src_uv_stride, - udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - dst_uv_stride); - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - // TODO(rbultje): trellis optimize - vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); - vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); - vp9_recon_sby_s_c(&x->e_mbd, - dst + 32 * x_idx + 32 * y_idx * dst_y_stride); - vp9_recon_sbuv_s_c(&x->e_mbd, - udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride); - - vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); - } else { - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + - (mi - mis)->mbmi.mb_skip_coeff : 0; - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts_sb(xd); - } else { - vp9_stuff_sb(cpi, xd, t, !output_enabled); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + vp9_transform_sb64y_32x32(x); + vp9_transform_sb64uv_32x32(x); + vp9_quantize_sb64y_32x32(x); + vp9_quantize_sb64uv_32x32(x); + if (x->optimize) { + vp9_optimize_sb64y_32x32(x); + vp9_optimize_sb64uv_32x32(x); } - } - - // copy skip flag on all mb_mode_info contexts in this SB - // if this was a skip at this txfm size - if (mb_col + x_idx * 2 < cm->mb_cols - 1) - mi[mis * y_idx * 2 + x_idx * 2 + 1].mbmi.mb_skip_coeff = - mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; - if (mb_row + y_idx * 2 < cm->mb_rows - 1) { - mi[mis * y_idx * 2 + x_idx * 2 + mis].mbmi.mb_skip_coeff = - mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; - if (mb_col + x_idx * 2 < cm->mb_cols - 1) - mi[mis * y_idx * 2 + x_idx * 2 + mis + 1].mbmi.mb_skip_coeff = - mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; - } - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + vp9_inverse_transform_sb64y_32x32(xd); + vp9_inverse_transform_sb64uv_32x32(xd); + break; + case TX_16X16: + vp9_transform_sb64y_16x16(x); + vp9_transform_sb64uv_16x16(x); + vp9_quantize_sb64y_16x16(x); + vp9_quantize_sb64uv_16x16(x); + if (x->optimize) { + vp9_optimize_sb64y_16x16(x); + vp9_optimize_sb64uv_16x16(x); + } + vp9_inverse_transform_sb64y_16x16(xd); + vp9_inverse_transform_sb64uv_16x16(xd); + break; + case TX_8X8: + vp9_transform_sb64y_8x8(x); + vp9_transform_sb64uv_8x8(x); + vp9_quantize_sb64y_8x8(x); + vp9_quantize_sb64uv_8x8(x); + if (x->optimize) { + vp9_optimize_sb64y_8x8(x); + vp9_optimize_sb64uv_8x8(x); + } + vp9_inverse_transform_sb64y_8x8(xd); + vp9_inverse_transform_sb64uv_8x8(xd); + break; + case TX_4X4: + vp9_transform_sb64y_4x4(x); + vp9_transform_sb64uv_4x4(x); + vp9_quantize_sb64y_4x4(x); + vp9_quantize_sb64uv_4x4(x); + if (x->optimize) { + vp9_optimize_sb64y_4x4(x); + vp9_optimize_sb64uv_4x4(x); + } + vp9_inverse_transform_sb64y_4x4(xd); + vp9_inverse_transform_sb64uv_4x4(xd); + break; + default: assert(0); } + vp9_recon_sb64y_s_c(xd, dst); + vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst); + + vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled); } else { - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + // FIXME(rbultje): not tile-aware (mi - 1) + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; - xd->left_context = cm->left_context + y_idx; - xd->above_context = cm->above_context + mb_col + x_idx; - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * mis; - - if (!x->skip) { - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_fidct_mb(x); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - - vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; - } else { - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : 0; - xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; - if (cpi->common.mb_no_coeff_skip) { - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_mb_tokens_context(xd); - } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (cm->mb_no_coeff_skip) { + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_sb64_tokens_context(xd); + } else { + vp9_stuff_sb64(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; } } - xd->mode_info_context = mi; - update_sb64_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled); + // copy skip flag on all mb_mode_info contexts in this SB + // if this was a skip at this txfm size + for (n = 1; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + if (mb_col + x_idx < cm->mb_cols && mb_row + y_idx < cm->mb_rows) + mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + } if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && - ((mi->mbmi.txfm_size == TX_32X32 && - skip[0] && skip[1] && skip[2] && skip[3]) || - (mi->mbmi.txfm_size != TX_32X32 && - skip[0] && skip[1] && skip[2] && skip[3] && - skip[4] && skip[5] && skip[6] && skip[7] && - skip[8] && skip[9] && skip[10] && skip[11] && - skip[12] && skip[13] && skip[14] && skip[15]))) || + !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; } else { int x, y; - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? - TX_32X32 : - cm->txfm_mode; + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) { diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index be9c224b3..75c8ea8f3 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -58,7 +58,8 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32); + vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], + b->dqcoeff, b->diff, 32); } vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -174,13 +175,16 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1); - vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); - vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], + b->dqcoeff, b->diff, 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1], + (b + 1)->dqcoeff, (b + 1)->diff, 32); i++; } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, ib + iblock[i]); - vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], + b->dqcoeff, b->diff, 32); } } } @@ -210,7 +214,8 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { x->fwd_txm4x4(be->src_diff, be->coeff, 16); x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16); + vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], + b->dqcoeff, b->diff, 16); vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 62f1a2a30..b2ee800cd 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -146,6 +146,50 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, } } +void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, + const uint8_t *pred, int dst_stride) { + int r, c; + + for (r = 0; r < 64; r++) { + for (c = 0; c < 64; c++) { + diff[c] = src[c] - pred[c]; + } + + diff += 64; + pred += dst_stride; + src += src_stride; + } +} + +void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride) { + int16_t *udiff = diff + 4096; + int16_t *vdiff = diff + 4096 + 1024; + int r, c; + + for (r = 0; r < 32; r++) { + for (c = 0; c < 32; c++) { + udiff[c] = usrc[c] - upred[c]; + } + + udiff += 32; + upred += dst_stride; + usrc += src_stride; + } + + for (r = 0; r < 32; r++) { + for (c = 0; c < 32; c++) { + vdiff[c] = vsrc[c] - vpred[c]; + } + + vdiff += 32; + vpred += dst_stride; + vsrc += src_stride; + } +} + void vp9_subtract_mby_c(int16_t *diff, uint8_t *src, uint8_t *pred, int stride) { vp9_subtract_mby_s_c(diff, src, stride, pred, 16); @@ -245,15 +289,168 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) { } void vp9_transform_sby_32x32(MACROBLOCK *x) { - SUPERBLOCK * const x_sb = &x->sb_coeff_data; - vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64); + vp9_short_fdct32x32(x->src_diff, x->coeff, 64); +} + +void vp9_transform_sby_16x16(MACROBLOCK *x) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, + x->coeff + n * 256, 64); + } +} + +void vp9_transform_sby_8x8(MACROBLOCK *x) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, + x->coeff + n * 64, 64); + } +} + +void vp9_transform_sby_4x4(MACROBLOCK *x) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, + x->coeff + n * 16, 64); + } } void vp9_transform_sbuv_16x16(MACROBLOCK *x) { - SUPERBLOCK * const x_sb = &x->sb_coeff_data; vp9_clear_system_state(); - x->fwd_txm16x16(x_sb->src_diff + 1024, x_sb->coeff + 1024, 32); - x->fwd_txm16x16(x_sb->src_diff + 1280, x_sb->coeff + 1280, 32); + x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32); + x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32); +} + +void vp9_transform_sbuv_8x8(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8, + x->coeff + 1024 + n * 64, 32); + x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8, + x->coeff + 1280 + n * 64, 32); + } +} + +void vp9_transform_sbuv_4x4(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4, + x->coeff + 1024 + n * 16, 32); + x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4, + x->coeff + 1280 + n * 16, 32); + } +} + +void vp9_transform_sb64y_32x32(MACROBLOCK *x) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32, + x->coeff + n * 1024, 128); + } +} + +void vp9_transform_sb64y_16x16(MACROBLOCK *x) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, + x->coeff + n * 256, 128); + } +} + +void vp9_transform_sb64y_8x8(MACROBLOCK *x) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, + x->coeff + n * 64, 128); + } +} + +void vp9_transform_sb64y_4x4(MACROBLOCK *x) { + int n; + + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + + x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4, + x->coeff + n * 16, 128); + } +} + +void vp9_transform_sb64uv_32x32(MACROBLOCK *x) { + vp9_clear_system_state(); + vp9_short_fdct32x32(x->src_diff + 4096, + x->coeff + 4096, 64); + vp9_short_fdct32x32(x->src_diff + 4096 + 1024, + x->coeff + 4096 + 1024, 64); +} + +void vp9_transform_sb64uv_16x16(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16, + x->coeff + 4096 + n * 256, 64); + x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16, + x->coeff + 4096 + 1024 + n * 256, 64); + } +} + +void vp9_transform_sb64uv_8x8(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8, + x->coeff + 4096 + n * 64, 64); + x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8, + x->coeff + 4096 + 1024 + n * 64, 64); + } +} + +void vp9_transform_sb64uv_4x4(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4, + x->coeff + 4096 + n * 16, 64); + x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4, + x->coeff + 4096 + 1024 + n * 16, 64); + } } #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) @@ -294,55 +491,35 @@ static int trellis_get_coeff_context(int token) { return vp9_get_coef_context(&recent_energy, token); } -static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, +static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, + const int16_t *dequant_ptr, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int tx_size) { const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; - BLOCK *b = &mb->block[i]; - BLOCKD *d = &xd->block[i]; - vp9_token_state tokens[257][2]; - unsigned best_index[257][2]; - const int16_t *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; - int16_t *qcoeff_ptr = d->qcoeff; - int16_t *dqcoeff_ptr = d->dqcoeff; - int eob = xd->eobs[i], final_eob, sz = 0; + vp9_token_state tokens[1025][2]; + unsigned best_index[1025][2]; + const int16_t *coeff_ptr = mb->coeff + ib * 16; + int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; + int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16; + int eob = xd->eobs[ib], final_eob, sz = 0; const int i0 = 0; - int rc, x, next; + int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; int rate0, rate1, error0, error1, t0, t1; int best, band, pt; int err_mult = plane_rd_mult[type]; int default_eob; int const *scan; + const int mul = 1 + (tx_size == TX_32X32); switch (tx_size) { default: case TX_4X4: - scan = vp9_default_zig_zag1d_4x4; default_eob = 16; - // TODO: this isn't called (for intra4x4 modes), but will be left in - // since it could be used later - { - TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d); - if (tx_type != DCT_DCT) { - switch (tx_type) { - case ADST_DCT: - scan = vp9_row_scan_4x4; - break; - - case DCT_ADST: - scan = vp9_col_scan_4x4; - break; - - default: - scan = vp9_default_zig_zag1d_4x4; - break; - } - } else { - scan = vp9_default_zig_zag1d_4x4; - } - } + // FIXME(rbultje): although optimize_b currently isn't called for + // intra4x4, this should be changed to be adst-compatible + scan = vp9_default_zig_zag1d_4x4; break; case TX_8X8: scan = vp9_default_zig_zag1d_8x8; @@ -352,6 +529,10 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, scan = vp9_default_zig_zag1d_16x16; default_eob = 256; break; + case TX_32X32: + scan = vp9_default_zig_zag1d_32x32; + default_eob = 1024; + break; } /* Now set up a Viterbi trellis to evaluate alternative roundings. */ @@ -395,7 +576,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, /* And pick the best. */ best = rd_cost1 < rd_cost0; base_bits = *(vp9_dct_value_cost_ptr + x); - dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; + dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]); d2 = dx * dx; tokens[i][0].rate = base_bits + (best ? rate1 : rate0); tokens[i][0].error = d2 + (best ? error1 : error0); @@ -407,8 +588,9 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) && - (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0])) + if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) && + (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul + + dequant_ptr[rc != 0])) shortcut = 1; else shortcut = 0; @@ -504,14 +686,14 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, final_eob = i; rc = scan[i]; qcoeff_ptr[rc] = x; - dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]); + dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; next = tokens[i][best].next; best = best_index[i][best]; } final_eob++; - xd->eobs[d - xd->block] = final_eob; + xd->eobs[ib] = final_eob; *a = *l = (final_eob > 0); } @@ -531,7 +713,7 @@ void vp9_optimize_mby_4x4(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; for (b = 0; b < 16; b++) { - optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, + optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); } @@ -553,7 +735,7 @@ void vp9_optimize_mbuv_4x4(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; for (b = 16; b < 24; b++) { - optimize_b(x, b, PLANE_TYPE_UV, + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); } @@ -583,7 +765,8 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, &above_ec, &left_ec, TX_8X8); + optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, + &above_ec, &left_ec, TX_8X8); a[1] = a[0] = above_ec; l[1] = l[0] = left_ec; } @@ -602,7 +785,8 @@ void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8); + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, + &above_ec, &left_ec, TX_8X8); } } @@ -621,7 +805,8 @@ void vp9_optimize_mby_16x16(MACROBLOCK *x) { ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0; tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0; - optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16); + optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + &ta, &tl, TX_16X16); } static void optimize_mb_16x16(MACROBLOCK *x) { @@ -629,6 +814,333 @@ static void optimize_mb_16x16(MACROBLOCK *x) { vp9_optimize_mbuv_8x8(x); } +void vp9_optimize_sby_32x32(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT ta, tl; + + ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + &ta, &tl, TX_32X32); +} + +void vp9_optimize_sby_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT ta[2], tl[2]; + int n; + + ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; + ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; + tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; + tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_16X16); + } +} + +void vp9_optimize_sby_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT ta[4], tl[4]; + int n; + + ta[0] = (a[0] + a[1]) != 0; + ta[1] = (a[2] + a[3]) != 0; + ta[2] = (a1[0] + a1[1]) != 0; + ta[3] = (a1[2] + a1[3]) != 0; + tl[0] = (l[0] + l[1]) != 0; + tl[1] = (l[2] + l[3]) != 0; + tl[2] = (l1[0] + l1[1]) != 0; + tl[3] = (l1[2] + l1[3]) != 0; + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_8X8); + } +} + +void vp9_optimize_sby_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT ta[8], tl[8]; + int n; + + vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_4X4); + } +} + +void vp9_optimize_sbuv_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; + int b; + + for (b = 64; b < 96; b += 16) { + const int cidx = b >= 80 ? 20 : 16; + a = ta + vp9_block2above_sb[TX_16X16][b]; + l = tl + vp9_block2left_sb[TX_16X16][b]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_16X16); + } +} + +void vp9_optimize_sbuv_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l, above_ec, left_ec; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 64; b < 96; b += 4) { + const int cidx = b >= 80 ? 20 : 16; + a = ta + vp9_block2above_sb[TX_8X8][b]; + l = tl + vp9_block2left_sb[TX_8X8][b]; + above_ec = (a[0] + a[1]) != 0; + left_ec = (l[0] + l[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_8X8); + a[0] = a[1] = above_ec; + l[0] = l[1] = left_ec; + } +} + +void vp9_optimize_sbuv_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 64; b < 96; b++) { + const int cidx = b >= 80 ? 20 : 16; + a = ta + vp9_block2above_sb[TX_4X4][b]; + l = tl + vp9_block2left_sb[TX_4X4][b]; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + a, l, TX_4X4); + } +} + +void vp9_optimize_sb64y_32x32(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); + ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); + ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); + ENTROPY_CONTEXT ta[2], tl[2]; + int n; + + ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0; + tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0; + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + optimize_b(x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_32X32); + } +} + +void vp9_optimize_sb64y_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); + ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); + ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); + ENTROPY_CONTEXT ta[4], tl[4]; + int n; + + ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; + ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; + ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0; + ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0; + tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; + tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; + tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0; + tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0; + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_16X16); + } +} + +void vp9_optimize_sb64y_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); + ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); + ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); + ENTROPY_CONTEXT ta[8], tl[8]; + int n; + + ta[0] = (a[0] + a[1]) != 0; + ta[1] = (a[2] + a[3]) != 0; + ta[2] = (a1[0] + a1[1]) != 0; + ta[3] = (a1[2] + a1[3]) != 0; + ta[4] = (a2[0] + a2[1]) != 0; + ta[5] = (a2[2] + a2[3]) != 0; + ta[6] = (a3[0] + a3[1]) != 0; + ta[7] = (a3[2] + a3[3]) != 0; + tl[0] = (l[0] + l[1]) != 0; + tl[1] = (l[2] + l[3]) != 0; + tl[2] = (l1[0] + l1[1]) != 0; + tl[3] = (l1[2] + l1[3]) != 0; + tl[4] = (l2[0] + l2[1]) != 0; + tl[5] = (l2[2] + l2[3]) != 0; + tl[6] = (l3[0] + l3[1]) != 0; + tl[7] = (l3[2] + l3[3]) != 0; + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_8X8); + } +} + +void vp9_optimize_sb64y_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT ta[16], tl[16]; + int n; + + vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + + optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_4X4); + } +} + +void vp9_optimize_sb64uv_32x32(MACROBLOCK *x) { + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + int b; + + for (b = 256; b < 384; b += 64) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_32X32][b]; + l = tl + vp9_block2left_sb64[TX_32X32][b]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &a_ec, &l_ec, TX_32X32); + } +} + +void vp9_optimize_sb64uv_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 256; b < 384; b += 16) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_16X16][b]; + l = tl + vp9_block2left_sb64[TX_16X16][b]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_16X16); + a[0] = a[1] = a1[0] = a1[1] = above_ec; + l[0] = l[1] = l1[0] = l1[1] = left_ec; + } +} + +void vp9_optimize_sb64uv_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l, above_ec, left_ec; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 256; b < 384; b += 4) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_8X8][b]; + l = tl + vp9_block2left_sb64[TX_8X8][b]; + above_ec = (a[0] + a[1]) != 0; + left_ec = (l[0] + l[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_8X8); + a[0] = a[1] = above_ec; + l[0] = l[1] = left_ec; + } +} + +void vp9_optimize_sb64uv_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 256; b < 384; b++) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_4X4][b]; + l = tl + vp9_block2left_sb64[TX_4X4][b]; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + a, l, TX_4X4); + } +} + void vp9_fidct_mb(MACROBLOCK *x) { MACROBLOCKD *const xd = &x->e_mbd; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 6356df215..917cf8b2a 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -35,7 +35,6 @@ void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_mb_8x8(MACROBLOCK *mb); void vp9_transform_mby_8x8(MACROBLOCK *x); void vp9_transform_mbuv_8x8(MACROBLOCK *x); -void vp9_build_dcblock_8x8(MACROBLOCK *b); void vp9_optimize_mby_8x8(MACROBLOCK *x); void vp9_optimize_mbuv_8x8(MACROBLOCK *x); @@ -44,7 +43,36 @@ void vp9_transform_mby_16x16(MACROBLOCK *x); void vp9_optimize_mby_16x16(MACROBLOCK *x); void vp9_transform_sby_32x32(MACROBLOCK *x); +void vp9_optimize_sby_32x32(MACROBLOCK *x); +void vp9_transform_sby_16x16(MACROBLOCK *x); +void vp9_optimize_sby_16x16(MACROBLOCK *x); +void vp9_transform_sby_8x8(MACROBLOCK *x); +void vp9_optimize_sby_8x8(MACROBLOCK *x); +void vp9_transform_sby_4x4(MACROBLOCK *x); +void vp9_optimize_sby_4x4(MACROBLOCK *x); void vp9_transform_sbuv_16x16(MACROBLOCK *x); +void vp9_optimize_sbuv_16x16(MACROBLOCK *x); +void vp9_transform_sbuv_8x8(MACROBLOCK *x); +void vp9_optimize_sbuv_8x8(MACROBLOCK *x); +void vp9_transform_sbuv_4x4(MACROBLOCK *x); +void vp9_optimize_sbuv_4x4(MACROBLOCK *x); + +void vp9_transform_sb64y_32x32(MACROBLOCK *x); +void vp9_optimize_sb64y_32x32(MACROBLOCK *x); +void vp9_transform_sb64y_16x16(MACROBLOCK *x); +void vp9_optimize_sb64y_16x16(MACROBLOCK *x); +void vp9_transform_sb64y_8x8(MACROBLOCK *x); +void vp9_optimize_sb64y_8x8(MACROBLOCK *x); +void vp9_transform_sb64y_4x4(MACROBLOCK *x); +void vp9_optimize_sb64y_4x4(MACROBLOCK *x); +void vp9_transform_sb64uv_32x32(MACROBLOCK *x); +void vp9_optimize_sb64uv_32x32(MACROBLOCK *x); +void vp9_transform_sb64uv_16x16(MACROBLOCK *x); +void vp9_optimize_sb64uv_16x16(MACROBLOCK *x); +void vp9_transform_sb64uv_8x8(MACROBLOCK *x); +void vp9_optimize_sb64uv_8x8(MACROBLOCK *x); +void vp9_transform_sb64uv_4x4(MACROBLOCK *x); +void vp9_optimize_sb64uv_4x4(MACROBLOCK *x); void vp9_fidct_mb(MACROBLOCK *x); @@ -63,5 +91,11 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *vsrc, int src_stride, const uint8_t *upred, const uint8_t *vpred, int dst_stride); +void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, + const uint8_t *pred, int dst_stride); +void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride); #endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 02a371964..50780d085 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -92,7 +92,7 @@ typedef struct { vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ @@ -476,9 +476,9 @@ typedef struct VP9_COMP { vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES]; vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES]; - vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; - vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32]; - vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; + vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES]; + vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES]; int gfu_boost; int last_boost; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 399e8ecda..75f22fac0 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -344,39 +344,301 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) { } void vp9_quantize_sby_32x32(MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &xd->block[0]; + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; quantize(b->zrun_zbin_boost, - x->sb_coeff_data.coeff, + x->coeff, 1024, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, - xd->sb_coeff_data.qcoeff, - xd->sb_coeff_data.dqcoeff, + xd->qcoeff, + xd->dqcoeff, d->dequant, b->zbin_extra, &xd->eobs[0], vp9_default_zig_zag1d_32x32, 2); } +void vp9_quantize_sby_16x16(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 4; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 256, + 256, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 256, + xd->dqcoeff + n * 256, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 16], + vp9_default_zig_zag1d_16x16, 1); +} + +void vp9_quantize_sby_8x8(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 16; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 64, + 64, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 64, + xd->dqcoeff + n * 64, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 4], + vp9_default_zig_zag1d_8x8, 1); +} + +void vp9_quantize_sby_4x4(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 64; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 16, + 16, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 16, + xd->dqcoeff + n * 16, + d->dequant, + b->zbin_extra, + &xd->eobs[n], + vp9_default_zig_zag1d_4x4, 1); +} + void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { int i; - MACROBLOCKD *xd = &x->e_mbd; + MACROBLOCKD *const xd = &x->e_mbd; - for (i = 16; i < 24; i += 4) - quantize(x->block[i].zrun_zbin_boost, - x->sb_coeff_data.coeff + 1024 + (i - 16) * 64, - 256, x->block[i].skip_block, - x->block[i].zbin, - x->block[i].round, x->block[0].quant, x->block[i].quant_shift, - xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, - xd->sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64, - xd->block[i].dequant, - x->block[i].zbin_extra, + for (i = 64; i < 96; i += 16) { + int cidx = i < 80 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 256, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, &xd->eobs[i], vp9_default_zig_zag1d_16x16, 1); + } +} + +void vp9_quantize_sbuv_8x8(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 64; i < 96; i += 4) { + int cidx = i < 80 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 64, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_8x8, 1); + } +} + +void vp9_quantize_sbuv_4x4(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 64; i < 96; i++) { + int cidx = i < 80 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 16, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_4x4, 1); + } +} + +void vp9_quantize_sb64y_32x32(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 4; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 1024, + 1024, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 1024, + xd->dqcoeff + n * 1024, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 64], + vp9_default_zig_zag1d_32x32, 2); +} + +void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 16; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 256, + 256, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 256, + xd->dqcoeff + n * 256, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 16], + vp9_default_zig_zag1d_16x16, 1); +} + +void vp9_quantize_sb64y_8x8(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 64; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 64, + 64, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 64, + xd->dqcoeff + n * 64, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 4], + vp9_default_zig_zag1d_8x8, 1); +} + +void vp9_quantize_sb64y_4x4(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 256; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 16, + 16, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 16, + xd->dqcoeff + n * 16, + d->dequant, + b->zbin_extra, + &xd->eobs[n], + vp9_default_zig_zag1d_4x4, 1); +} + +void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i += 64) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 1024, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_32x32, 2); + } +} + +void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i += 16) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 256, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_16x16, 1); + } +} + +void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i += 4) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 64, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_8x8, 1); + } +} + +void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i++) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 16, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_4x4, 1); + } } /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index d338e620a..32eb05a11 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -74,7 +74,21 @@ extern prototype_quantize_block(vp9_quantize_quantb_16x16); extern prototype_quantize_mb(vp9_quantize_mby_16x16); void vp9_quantize_sby_32x32(MACROBLOCK *x); +void vp9_quantize_sby_16x16(MACROBLOCK *x); +void vp9_quantize_sby_8x8(MACROBLOCK *x); +void vp9_quantize_sby_4x4(MACROBLOCK *x); void vp9_quantize_sbuv_16x16(MACROBLOCK *x); +void vp9_quantize_sbuv_8x8(MACROBLOCK *x); +void vp9_quantize_sbuv_4x4(MACROBLOCK *x); + +void vp9_quantize_sb64y_32x32(MACROBLOCK *x); +void vp9_quantize_sb64y_16x16(MACROBLOCK *x); +void vp9_quantize_sb64y_8x8(MACROBLOCK *x); +void vp9_quantize_sb64y_4x4(MACROBLOCK *x); +void vp9_quantize_sb64uv_32x32(MACROBLOCK *x); +void vp9_quantize_sb64uv_16x16(MACROBLOCK *x); +void vp9_quantize_sb64uv_8x8(MACROBLOCK *x); +void vp9_quantize_sb64uv_4x4(MACROBLOCK *x); struct VP9_COMP; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 59e33a464..c5b3e3a16 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -275,7 +275,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { fill_token_costs(cpi->mb.token_costs[TX_16X16], cpi->common.fc.coef_probs_16x16, BLOCK_TYPES); fill_token_costs(cpi->mb.token_costs[TX_32X32], - cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32); + cpi->common.fc.coef_probs_32x32, BLOCK_TYPES); /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; @@ -382,25 +382,27 @@ int vp9_uvsse(MACROBLOCK *x) { } static INLINE int cost_coeffs(MACROBLOCK *mb, - BLOCKD *b, PLANE_TYPE type, + int ib, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, TX_SIZE tx_size) { - int pt; MACROBLOCKD *const xd = &mb->e_mbd; - const int ib = (int)(b - xd->block); + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; + int pt; const int eob = xd->eobs[ib]; int c = 0; int cost = 0, seg_eob; - const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int segment_id = mbmi->segment_id; const int *scan; - int16_t *qcoeff_ptr = b->qcoeff; - const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type(xd, b) : DCT_DCT; + const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; + const int ref = mbmi->ref_frame != INTRA_FRAME; + const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 && + type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type(xd, &xd->block[ib]) : DCT_DCT; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; + ENTROPY_CONTEXT a_ec, l_ec; ENTROPY_CONTEXT *const a1 = a + sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); ENTROPY_CONTEXT *const l1 = l + @@ -408,6 +410,8 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, switch (tx_size) { case TX_4X4: + a_ec = *a; + l_ec = *l; scan = vp9_default_zig_zag1d_4x4; seg_eob = 16; if (type == PLANE_TYPE_Y_WITH_DC) { @@ -428,8 +432,6 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, scan = vp9_default_zig_zag1d_16x16; seg_eob = 256; if (type == PLANE_TYPE_UV) { - const int uv_idx = ib - 16; - qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx; a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; } else { @@ -440,11 +442,22 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, case TX_32X32: scan = vp9_default_zig_zag1d_32x32; seg_eob = 1024; - qcoeff_ptr = xd->sb_coeff_data.qcoeff; - a_ec = (a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]) != 0; + if (type == PLANE_TYPE_UV) { + ENTROPY_CONTEXT *a2, *a3, *l2, *l3; + a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a_ec = (a[0] + a[1] + a1[0] + a1[1] + + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + } else { + a_ec = (a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + } break; default: abort(); @@ -510,7 +523,7 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC, + cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); @@ -553,7 +566,7 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b += 4) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC, + cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); @@ -593,7 +606,7 @@ static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { tl = (ENTROPY_CONTEXT *)xd->left_context; } - cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); + cost = cost_coeffs(mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); return cost; } @@ -743,7 +756,7 @@ static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { tl = (ENTROPY_CONTEXT *) xd->left_context; } - return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); + return cost_coeffs(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); } static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, @@ -763,9 +776,7 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, static void super_block_yrd_32x32(MACROBLOCK *x, int *rate, int *distortion, int *skippable, int backup) { - SUPERBLOCK * const x_sb = &x->sb_coeff_data; - MACROBLOCKD * const xd = &x->e_mbd; - SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; + MACROBLOCKD *const xd = &x->e_mbd; #if DEBUG_ERROR int16_t out[1024]; #endif @@ -773,17 +784,17 @@ static void super_block_yrd_32x32(MACROBLOCK *x, vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); #if DEBUG_ERROR - vp9_short_idct32x32(xd_sb->dqcoeff, out, 64); + vp9_short_idct32x32(xd->dqcoeff, out, 64); #endif - *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024); + *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024); #if DEBUG_ERROR printf("IDCT/FDCT error 32x32: %d (d: %d)\n", - vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion); + vp9_block_error_c(x->src_diff, out, 1024), *distortion); #endif *rate = rdcost_sby_32x32(x, backup); - *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd); + *skippable = vp9_sby_is_skippable_32x32(xd); } static void super_block_yrd(VP9_COMP *cpi, @@ -807,7 +818,7 @@ static void super_block_yrd(VP9_COMP *cpi, s[n] = 1; } - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1); @@ -896,7 +907,7 @@ static void super_block_64_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_32X32][x_idx << 1]; xd->left_context = &t_left[TX_32X32][y_idx << 1]; - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, + vp9_subtract_sby_s_c(x->src_diff, src + 32 * x_idx + 32 * y_idx * src_y_stride, src_y_stride, dst + 32 * x_idx + 32 * y_idx * dst_y_stride, @@ -1051,7 +1062,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, tempa = ta; templ = tl; - ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); + ratey = cost_coeffs(x, b - xd->block, + PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); rate += ratey; distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; @@ -1355,7 +1367,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ta1 = ta0 + 1; tl1 = tl0 + 1; - rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, + rate_t = cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, ta0, tl0, TX_8X8); rate += rate_t; @@ -1388,12 +1400,12 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, x->quantize_b_4x4(x, ib + iblock[i]); } distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); - rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); if (do_two) { i++; - rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); } @@ -1500,7 +1512,7 @@ static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) { } for (b = 16; b < 24; b++) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, + cost += cost_coeffs(mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); @@ -1541,7 +1553,7 @@ static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { } for (b = 16; b < 24; b += 4) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, + cost += cost_coeffs(mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); @@ -1580,7 +1592,7 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { } for (b = 16; b < 24; b += 4) - cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV, + cost += cost_coeffs(x, b * 4, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_16X16); @@ -1596,8 +1608,8 @@ static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate, vp9_quantize_sbuv_16x16(x); *rate = rd_cost_sbuv_16x16(x, backup); - *distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024, - xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2; + *distortion = vp9_block_error_c(x->coeff + 1024, + xd->dqcoeff + 1024, 512) >> 2; *skip = vp9_sbuv_is_skippable_16x16(xd); } @@ -1609,8 +1621,8 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - if (mbmi->txfm_size == TX_32X32) { - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + if (mbmi->txfm_size >= TX_16X16) { + vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1); @@ -1789,8 +1801,8 @@ static void super_block_uvrd(MACROBLOCK *x, const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - if (mbmi->txfm_size == TX_32X32) { - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + if (mbmi->txfm_size >= TX_16X16) { + vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1); @@ -1842,6 +1854,46 @@ static void super_block_uvrd(MACROBLOCK *x, } } +static int rd_cost_sb64uv_32x32(MACROBLOCK *x, int backup) { + int b; + int cost = 0; + MACROBLOCKD *const xd = &x->e_mbd; + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta, *tl; + + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + + ta = (ENTROPY_CONTEXT *) &t_above; + tl = (ENTROPY_CONTEXT *) &t_left; + } else { + ta = (ENTROPY_CONTEXT *)xd->above_context; + tl = (ENTROPY_CONTEXT *)xd->left_context; + } + + for (b = 16; b < 24; b += 4) + cost += cost_coeffs(x, b * 16, PLANE_TYPE_UV, + ta + vp9_block2above[TX_8X8][b], + tl + vp9_block2left[TX_8X8][b], TX_32X32); + + return cost; +} + +static void rd_inter64x64_uv_32x32(MACROBLOCK *x, int *rate, + int *distortion, int *skip, + int backup) { + MACROBLOCKD *const xd = &x->e_mbd; + + vp9_transform_sb64uv_32x32(x); + vp9_quantize_sb64uv_32x32(x); + + *rate = rd_cost_sb64uv_32x32(x, backup); + *distortion = vp9_block_error_c(x->coeff + 4096, + xd->dqcoeff + 4096, 2048); + *skip = vp9_sb64uv_is_skippable_32x32(xd); +} + static void super_block_64_uvrd(MACROBLOCK *x, int *rate, int *distortion, @@ -1856,10 +1908,15 @@ static void super_block_64_uvrd(MACROBLOCK *x, ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; int d = 0, r = 0, n, s = 1; + // FIXME not needed if tx=32x32 memcpy(t_above, xd->above_context, sizeof(t_above)); memcpy(t_left, xd->left_context, sizeof(t_left)); if (mbmi->txfm_size == TX_32X32) { + vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + rd_inter64x64_uv_32x32(x, &r, &d, &s, 1); + } else if (mbmi->txfm_size == TX_16X16) { int n; *rate = 0; @@ -1867,7 +1924,7 @@ static void super_block_64_uvrd(MACROBLOCK *x, int x_idx = n & 1, y_idx = n >> 1; int r_tmp, d_tmp, s_tmp; - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + vp9_subtract_sbuv_s_c(x->src_diff, usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, src_uv_stride, @@ -2170,7 +2227,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, x->quantize_b_4x4(x, i); thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, i, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][i], tl + vp9_block2left[TX_4X4][i], TX_4X4); } @@ -2233,10 +2290,10 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_8x8(x, idx); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); otherdist += thisdistortion; - othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[TX_8X8][idx], - tlcp + vp9_block2left[TX_8X8][idx], - TX_8X8); + othercost += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, + tacp + vp9_block2above[TX_8X8][idx], + tlcp + vp9_block2left[TX_8X8][idx], + TX_8X8); } for (j = 0; j < 4; j += 2) { bd = &xd->block[ib + iblock[j]]; @@ -2245,11 +2302,12 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][ib + iblock[j]], tl + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); - *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, ib + iblock[j] + 1, + PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1], tl + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); @@ -2263,11 +2321,12 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); otherdist += thisdistortion; - othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, + othercost += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_4X4][ib + iblock[j]], tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); - othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, + othercost += cost_coeffs(x, ib + iblock[j] + 1, + PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1], tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); @@ -2277,7 +2336,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_8x8(x, idx); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][idx], tl + vp9_block2left[TX_8X8][idx], TX_8X8); } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 95a2e1227..d115fe80e 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -28,12 +28,12 @@ vp9_coeff_accum context_counters_4x4[BLOCK_TYPES]; vp9_coeff_accum context_counters_8x8[BLOCK_TYPES]; vp9_coeff_accum context_counters_16x16[BLOCK_TYPES]; -vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; +vp9_coeff_accum context_counters_32x32[BLOCK_TYPES]; extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES]; extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES]; extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES]; -extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; +extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES]; #endif /* ENTROPY_STATS */ static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; @@ -101,37 +101,52 @@ static void tokenize_b(VP9_COMP *cpi, PLANE_TYPE type, TX_SIZE tx_size, int dry_run) { + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ int c = 0; int recent_energy = 0; - const BLOCKD * const b = xd->block + ib; const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ - int16_t *qcoeff_ptr = b->qcoeff; + int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib; int seg_eob; - const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int segment_id = mbmi->segment_id; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; const int *scan; vp9_coeff_count *counts; vp9_coeff_probs *probs; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type(xd, b) : DCT_DCT; - const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; - - ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + - vp9_block2left[tx_size][ib]; - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; - - ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + - vp9_block2left[tx_size][ib]; + const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 && + type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type(xd, &xd->block[ib]) : DCT_DCT; + const int ref = mbmi->ref_frame != INTRA_FRAME; + ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + if (sb_type == BLOCK_SIZE_SB64X64) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb64[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + } else if (sb_type == BLOCK_SIZE_SB32X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a3 = l2 = l3 = NULL; + } else { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; + a1 = l1 = a2 = l2 = a3 = l3 = NULL; + } switch (tx_size) { default: case TX_4X4: + a_ec = *a; + l_ec = *l; seg_eob = 16; scan = vp9_default_zig_zag1d_4x4; if (tx_type != DCT_DCT) { @@ -164,23 +179,23 @@ static void tokenize_b(VP9_COMP *cpi, scan = vp9_default_zig_zag1d_16x16; counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; - if (type == PLANE_TYPE_UV) { - int uv_idx = (ib - 16) >> 2; - qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx; - } break; case TX_32X32: - a_ec = a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]; - l_ec = l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]; - a_ec = a_ec != 0; - l_ec = l_ec != 0; + if (type != PLANE_TYPE_UV) { + a_ec = (a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + } else { + a_ec = (a[0] + a[1] + a1[0] + a1[1] + + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + } seg_eob = 1024; scan = vp9_default_zig_zag1d_32x32; counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; - qcoeff_ptr = xd->sb_coeff_data.qcoeff; break; } @@ -233,10 +248,17 @@ static void tokenize_b(VP9_COMP *cpi, l1[0] = l1[1] = l[1] = l_ec; } } else if (tx_size == TX_32X32) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; - a1[0] = a1[1] = a1[2] = a1[3] = a_ec; - l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + if (type != PLANE_TYPE_UV) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; + a1[0] = a1[1] = a1[2] = a1[3] = a_ec; + l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + } else { + a[1] = a1[0] = a1[1] = a_ec; + l[1] = l1[0] = l1[1] = l_ec; + a2[0] = a2[1] = a3[0] = a3[1] = a_ec; + l2[0] = l2[1] = l3[0] = l3[1] = l_ec; + } } } @@ -289,9 +311,7 @@ static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) { } int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - skip &= !xd->eobs[0]; - return skip; + return (!xd->eobs[0]); } static int mb_is_skippable_16x16(MACROBLOCKD *xd) { @@ -299,13 +319,11 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) { } int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { - int skip = 1; - skip &= !xd->eobs[0]; - return skip; + return (!xd->eobs[0]); } int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->eobs[16]) & (!xd->eobs[20]); + return (!xd->eobs[64]) & (!xd->eobs[80]); } static int sb_is_skippable_32x32(MACROBLOCKD *xd) { @@ -313,6 +331,68 @@ static int sb_is_skippable_32x32(MACROBLOCKD *xd) { vp9_sbuv_is_skippable_16x16(xd); } +static int sby_is_skippable_16x16(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 64; i += 16) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb_is_skippable_16x16(MACROBLOCKD *xd) { + return sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd); +} + +static int sby_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 64; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sbuv_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 64; i < 96; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb_is_skippable_8x8(MACROBLOCKD *xd) { + return sby_is_skippable_8x8(xd) & sbuv_is_skippable_8x8(xd); +} + +static int sby_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 64; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sbuv_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 64; i < 96; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb_is_skippable_4x4(MACROBLOCKD *xd) { + return sby_is_skippable_4x4(xd) & sbuv_is_skippable_4x4(xd); +} + void vp9_tokenize_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, @@ -325,7 +405,21 @@ void vp9_tokenize_sb(VP9_COMP *cpi, const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); int b; - mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd); + switch (mbmi->txfm_size) { + case TX_32X32: + mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd); + break; + case TX_16X16: + mbmi->mb_skip_coeff = sb_is_skippable_16x16(xd); + break; + case TX_8X8: + mbmi->mb_skip_coeff = sb_is_skippable_8x8(xd); + break; + case TX_4X4: + mbmi->mb_skip_coeff = sb_is_skippable_4x4(xd); + break; + default: assert(0); + } if (mbmi->mb_skip_coeff) { if (!dry_run) @@ -333,7 +427,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (!cm->mb_no_coeff_skip) { vp9_stuff_sb(cpi, xd, t, dry_run); } else { - vp9_fix_contexts_sb(xd); + vp9_reset_sb_tokens_context(xd); } if (dry_run) *t = t_backup; @@ -343,13 +437,215 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (!dry_run) cpi->skip_false_count[mb_skip_context] += skip_inc; - tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, dry_run); - - for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); + switch (mbmi->txfm_size) { + case TX_32X32: + tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, + TX_32X32, dry_run); + for (b = 64; b < 96; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, dry_run); + break; + case TX_16X16: + for (b = 0; b < 64; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_16X16, dry_run); + for (b = 64; b < 96; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 64; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_8X8, dry_run); + for (b = 64; b < 96; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 64; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_4X4, dry_run); + for (b = 64; b < 96; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_4X4, dry_run); + break; + default: assert(0); } + + if (dry_run) + *t = t_backup; +} + +static int sb64y_is_skippable_32x32(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i += 64) + skip &= (!xd->eobs[i]); + + return skip; +} + +int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) { + return (!xd->eobs[256]) & (!xd->eobs[320]); +} + +static int sb64_is_skippable_32x32(MACROBLOCKD *xd) { + return sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd); +} + +static int sb64y_is_skippable_16x16(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i += 16) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64uv_is_skippable_16x16(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 256; i < 384; i += 16) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64_is_skippable_16x16(MACROBLOCKD *xd) { + return sb64y_is_skippable_16x16(xd) & sb64uv_is_skippable_16x16(xd); +} + +static int sb64y_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64uv_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 256; i < 384; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64_is_skippable_8x8(MACROBLOCKD *xd) { + return sb64y_is_skippable_8x8(xd) & sb64uv_is_skippable_8x8(xd); +} + +static int sb64y_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64uv_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 256; i < 384; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64_is_skippable_4x4(MACROBLOCKD *xd) { + return sb64y_is_skippable_4x4(xd) & sb64uv_is_skippable_4x4(xd); +} + +void vp9_tokenize_sb64(VP9_COMP *cpi, + MACROBLOCKD *xd, + TOKENEXTRA **t, + int dry_run) { + VP9_COMMON * const cm = &cpi->common; + MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; + TOKENEXTRA *t_backup = *t; + const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); + const int segment_id = mbmi->segment_id; + const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + int b; + + switch (mbmi->txfm_size) { + case TX_32X32: + mbmi->mb_skip_coeff = sb64_is_skippable_32x32(xd); + break; + case TX_16X16: + mbmi->mb_skip_coeff = sb64_is_skippable_16x16(xd); + break; + case TX_8X8: + mbmi->mb_skip_coeff = sb64_is_skippable_8x8(xd); + break; + case TX_4X4: + mbmi->mb_skip_coeff = sb64_is_skippable_4x4(xd); + break; + default: assert(0); + } + + if (mbmi->mb_skip_coeff) { + if (!dry_run) + cpi->skip_true_count[mb_skip_context] += skip_inc; + if (!cm->mb_no_coeff_skip) { + vp9_stuff_sb64(cpi, xd, t, dry_run); + } else { + vp9_reset_sb64_tokens_context(xd); + } + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) + cpi->skip_false_count[mb_skip_context] += skip_inc; + + switch (mbmi->txfm_size) { + case TX_32X32: + for (b = 0; b < 256; b += 64) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_32X32, dry_run); + for (b = 256; b < 384; b += 64) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_32X32, dry_run); + break; + case TX_16X16: + for (b = 0; b < 256; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_16X16, dry_run); + for (b = 256; b < 384; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 256; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_8X8, dry_run); + for (b = 256; b < 384; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 256; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_4X4, dry_run); + for (b = 256; b < 384; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_4X4, dry_run); + break; + default: assert(0); + } + if (dry_run) *t = t_backup; } @@ -567,23 +863,23 @@ void print_context_counters() { /* print counts */ print_counter(f, context_counters_4x4, BLOCK_TYPES, - "vp9_default_coef_counts_4x4[BLOCK_TYPES_4X4]"); + "vp9_default_coef_counts_4x4[BLOCK_TYPES]"); print_counter(f, context_counters_8x8, BLOCK_TYPES, - "vp9_default_coef_counts_8x8[BLOCK_TYPES_8X8]"); + "vp9_default_coef_counts_8x8[BLOCK_TYPES]"); print_counter(f, context_counters_16x16, BLOCK_TYPES, - "vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]"); - print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32, - "vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]"); + "vp9_default_coef_counts_16x16[BLOCK_TYPES]"); + print_counter(f, context_counters_32x32, BLOCK_TYPES, + "vp9_default_coef_counts_32x32[BLOCK_TYPES]"); /* print coefficient probabilities */ print_probs(f, context_counters_4x4, BLOCK_TYPES, - "default_coef_probs_4x4[BLOCK_TYPES_4X4]"); + "default_coef_probs_4x4[BLOCK_TYPES]"); print_probs(f, context_counters_8x8, BLOCK_TYPES, - "default_coef_probs_8x8[BLOCK_TYPES_8X8]"); + "default_coef_probs_8x8[BLOCK_TYPES]"); print_probs(f, context_counters_16x16, BLOCK_TYPES, - "default_coef_probs_16x16[BLOCK_TYPES_16X16]"); - print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32, - "default_coef_probs_32x32[BLOCK_TYPES_32X32]"); + "default_coef_probs_16x16[BLOCK_TYPES]"); + print_probs(f, context_counters_32x32, BLOCK_TYPES, + "default_coef_probs_32x32[BLOCK_TYPES]"); fclose(f); @@ -600,31 +896,49 @@ void vp9_tokenize_initialize() { fill_value_tokens(); } -static INLINE void stuff_b(VP9_COMP *cpi, - MACROBLOCKD *xd, - const int ib, - TOKENEXTRA **tp, - PLANE_TYPE type, - TX_SIZE tx_size, - int dry_run) { +static void stuff_b(VP9_COMP *cpi, + MACROBLOCKD *xd, + const int ib, + TOKENEXTRA **tp, + PLANE_TYPE type, + TX_SIZE tx_size, + int dry_run) { vp9_coeff_count *counts; vp9_coeff_probs *probs; int pt, band; TOKENEXTRA *t = *tp; - const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; - ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + - vp9_block2left[tx_size][ib]; - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; - ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + - vp9_block2left[tx_size][ib]; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const int ref = mbmi->ref_frame != INTRA_FRAME; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; + ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + + if (sb_type == BLOCK_SIZE_SB32X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb64[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + } else if (sb_type == BLOCK_SIZE_SB32X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = l2 = a3 = l3 = NULL; + } else { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; + a1 = l1 = a2 = l2 = a3 = l3 = NULL; + } switch (tx_size) { default: case TX_4X4: + a_ec = a[0]; + l_ec = l[0]; counts = cpi->coef_counts_4x4; probs = cpi->common.fc.coef_probs_4x4; break; @@ -646,12 +960,17 @@ static INLINE void stuff_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_16x16; break; case TX_32X32: - a_ec = a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]; - l_ec = l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]; - a_ec = a_ec != 0; - l_ec = l_ec != 0; + if (type != PLANE_TYPE_UV) { + a_ec = (a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + } else { + a_ec = (a[0] + a[1] + a1[0] + a1[1] + + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + } counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; break; @@ -678,10 +997,17 @@ static INLINE void stuff_b(VP9_COMP *cpi, l1[0] = l1[1] = l[1] = l_ec; } } else if (tx_size == TX_32X32) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; - a1[0] = a1[1] = a1[2] = a1[3] = a_ec; - l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + if (type != PLANE_TYPE_Y_WITH_DC) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; + a1[0] = a1[1] = a1[2] = a1[3] = a_ec; + l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + } else { + a[1] = a1[0] = a1[1] = a_ec; + l[1] = l1[0] = l1[1] = l_ec; + a2[0] = a2[1] = a3[0] = a3[1] = a_ec; + l2[0] = l2[1] = l3[0] = l3[1] = l_ec; + } } if (!dry_run) { @@ -751,27 +1077,76 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } } -static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - int b; - - stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - for (b = 16; b < 24; b += 4) { - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - } -} - void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { TOKENEXTRA * const t_backup = *t; + int b; - stuff_sb_32x32(cpi, xd, t, dry_run); + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); + for (b = 64; b < 96; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + break; + case TX_16X16: + for (b = 0; b < 64; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); + for (b = 64; b < 96; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 64; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); + for (b = 64; b < 96; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 64; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); + for (b = 64; b < 96; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); + break; + default: assert(0); + } if (dry_run) { *t = t_backup; } } -void vp9_fix_contexts_sb(MACROBLOCKD *xd) { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); +void vp9_stuff_sb64(VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run) { + TOKENEXTRA * const t_backup = *t; + int b; + + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + for (b = 0; b < 256; b += 64) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); + for (b = 256; b < 384; b += 64) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_32X32, dry_run); + break; + case TX_16X16: + for (b = 0; b < 256; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); + for (b = 256; b < 384; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 256; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); + for (b = 256; b < 384; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 256; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); + for (b = 256; b < 384; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); + break; + default: assert(0); + } + + if (dry_run) { + *t = t_backup; + } } diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 6ac19ba71..4d6fe6343 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -38,6 +38,7 @@ int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd); +int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd); struct VP9_COMP; @@ -45,13 +46,15 @@ void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); +void vp9_tokenize_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); - -void vp9_fix_contexts_sb(MACROBLOCKD *xd); +void vp9_stuff_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); #ifdef ENTROPY_STATS void init_context_counters(); @@ -60,7 +63,7 @@ void print_context_counters(); extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES]; extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES]; extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES]; -extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; +extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES]; #endif extern const int *vp9_dct_value_cost_ptr;