From 111ca4213355ac4edd10b3c14461096d56e3f3d0 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 4 Mar 2013 14:12:17 -0800 Subject: [PATCH] Make superblocks independent of macroblock code and data. Split macroblock and superblock tokenization and detokenization functions and coefficient-related data structs so that the bitstream layout and related code of superblock coefficients looks less like it's a hack to fit macroblocks in superblocks. In addition, unify chroma transform size selection from luma transform size (i.e. always use the same size, as long as it fits the predictor); in practice, this means 32x32 and 64x64 superblocks using the 16x16 luma transform will now use the 16x16 (instead of the 8x8) chroma transform, and 64x64 superblocks using the 32x32 luma transform will now use the 32x32 (instead of the 16x16) chroma transform. Lastly, add a trellis optimize function for 32x32 transform blocks. HD gains about 0.3%, STDHD about 0.15% and derf about 0.1%. There's a few negative points here and there that I might want to analyze a little closer. Change-Id: Ibad7c3ddfe1acfc52771dfc27c03e9783e054430 --- vp9/common/vp9_blockd.c | 436 +++++++++++++++++++- vp9/common/vp9_blockd.h | 27 +- vp9/common/vp9_default_coef_probs.h | 350 ++++++++++------ vp9/common/vp9_entropy.c | 2 +- vp9/common/vp9_entropy.h | 15 +- vp9/common/vp9_idctllm.c | 22 +- vp9/common/vp9_invtrans.c | 188 ++++++++- vp9/common/vp9_invtrans.h | 22 +- vp9/common/vp9_onyxc_int.h | 6 +- vp9/common/vp9_recon.c | 36 +- vp9/common/vp9_rtcd_defs.sh | 6 + vp9/decoder/vp9_decodframe.c | 424 ++++++++----------- vp9/decoder/vp9_dequantize.c | 4 +- vp9/decoder/vp9_detokenize.c | 207 +++++++++- vp9/decoder/vp9_detokenize.h | 6 +- vp9/encoder/vp9_bitstream.c | 16 +- vp9/encoder/vp9_block.h | 11 +- vp9/encoder/vp9_encodeframe.c | 472 ++++++++------------- vp9/encoder/vp9_encodeintra.c | 15 +- vp9/encoder/vp9_encodemb.c | 608 +++++++++++++++++++++++++--- vp9/encoder/vp9_encodemb.h | 36 +- vp9/encoder/vp9_onyx_int.h | 8 +- vp9/encoder/vp9_quantize.c | 296 +++++++++++++- vp9/encoder/vp9_quantize.h | 14 + vp9/encoder/vp9_rdopt.c | 167 +++++--- vp9/encoder/vp9_tokenize.c | 575 +++++++++++++++++++++----- vp9/encoder/vp9_tokenize.h | 9 +- 27 files changed, 2959 insertions(+), 1019 deletions(-) diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c index 1eda3cc38..9151622d3 100644 --- a/vp9/common/vp9_blockd.c +++ b/vp9/common/vp9_blockd.c @@ -12,15 +12,431 @@ #include "vp9/common/vp9_blockd.h" #include "vpx_mem/vpx_mem.h" -const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24] = { - {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}, - {0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6} +const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24] = { + { 0, 0, 0, 0, + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, + 4, 4, + 5, 5, + 6, 6, + 7, 7 }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 2, 2, 2, 2, + 2, 2, 2, 2, + 4, 4, + 4, 4, + 6, 6, + 6, 6 }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 }, }; -const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24] = { - {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7}, - {0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 6, 6, 6, 6} +const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24] = { + { 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 4, 5, + 4, 5, + 6, 7, + 6, 7 }, + { 0, 0, 0, 0, + 2, 2, 2, 2, + 0, 0, 0, 0, + 2, 2, 2, 2, + 4, 4, + 4, 4, + 6, 6, + 6, 6 }, + { 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 }, }; + +#define S(x) x + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT) +const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96] = { + { 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), + 4, 4, 4, 4, + 5, 5, 5, 5, + S(4), S(4), S(4), S(4), + S(5), S(5), S(5), S(5), + 6, 6, 6, 6, + 7, 7, 7, 7, + S(6), S(6), S(6), S(6), + S(7), S(7), S(7), S(7) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + 4, 4, 4, 4, + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + S(4), S(4), S(4), S(4), + 6, 6, 6, 6, + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + S(6), S(6), S(6), S(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6 }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }, +}; +const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96] = { + { 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 4, 5, S(4), S(5), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7), + 6, 7, S(6), S(7) }, + { 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 0, 0, 0, 0, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 4, 4, 4, 4, + S(4), S(4), S(4), S(4), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6), + 6, 6, 6, 6, + S(6), S(6), S(6), S(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6, + 6, 6, 6, 6 }, + { 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + +#define T(x) x + 2 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) +#define U(x) x + 3 * (sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT)) +const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), S(1), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), S(3), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), T(3), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), U(1), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), U(3), + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + S(5), S(5), S(5), S(5), S(5), S(5), S(5), S(5), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(5), T(5), T(5), T(5), T(5), T(5), T(5), T(5), + U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), + U(5), U(5), U(5), U(5), U(5), U(5), U(5), U(5), + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + S(7), S(7), S(7), S(7), S(7), S(7), S(7), S(7), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(7), T(7), T(7), T(7), T(7), T(7), T(7), T(7), + U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6), + U(7), U(7), U(7), U(7), U(7), U(7), U(7), U(7) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), T(2), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), U(2), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + S(4), S(4), S(4), S(4), S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), + U(4), U(4), U(4), U(4), U(4), U(4), U(4), U(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + S(6), S(6), S(6), S(6), S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6), + U(6), U(6), U(6), U(6), U(6), U(6), U(6), U(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6 }, +}; +const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384] = { + { 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 0, 1, 2, 3, S(0), S(1), S(2), S(3), T(0), T(1), T(2), T(3), U(0), U(1), U(2), U(3), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 4, 5, S(4), S(5), T(4), T(5), U(4), U(5), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7), + 6, 7, S(6), S(7), T(6), T(7), U(6), U(7) }, + { 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 0, 0, 0, 0, 2, 2, 2, 2, S(0), S(0), S(0), S(0), S(2), S(2), S(2), S(2), + T(0), T(0), T(0), T(0), T(2), T(2), T(2), T(2), U(0), U(0), U(0), U(0), U(2), U(2), U(2), U(2), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 4, 4, 4, 4, S(4), S(4), S(4), S(4), + T(4), T(4), T(4), T(4), U(4), U(4), U(4), U(4), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6), + 6, 6, 6, 6, S(6), S(6), S(6), S(6), + T(6), T(6), T(6), T(6), U(6), U(6), U(6), U(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), S(0), + T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0), + U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), U(0), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + T(4), T(4), T(4), T(4), T(4), T(4), T(4), T(4), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6), + T(6), T(6), T(6), T(6), T(6), T(6), T(6), T(6) }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6 }, +}; +#undef U +#undef T +#undef S diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b35c1c246..b46dd0568 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -277,13 +277,6 @@ typedef struct blockd { union b_mode_info bmi; } BLOCKD; -typedef struct superblockd { - /* 32x32 Y and 16x16 U/V */ - DECLARE_ALIGNED(16, int16_t, diff[32*32+16*16*2]); - DECLARE_ALIGNED(16, int16_t, qcoeff[32*32+16*16*2]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[32*32+16*16*2]); -} SUPERBLOCKD; - struct scale_factors { int x_num; int x_den; @@ -297,13 +290,11 @@ struct scale_factors { }; typedef struct macroblockd { - DECLARE_ALIGNED(16, int16_t, diff[384]); /* from idct diff */ - DECLARE_ALIGNED(16, uint8_t, predictor[384]); - DECLARE_ALIGNED(16, int16_t, qcoeff[384]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[384]); - DECLARE_ALIGNED(16, uint16_t, eobs[24]); - - SUPERBLOCKD sb_coeff_data; + DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ + DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks + DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]); + DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]); /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */ BLOCKD block[24]; @@ -451,8 +442,12 @@ static TX_TYPE txfm_map(B_PREDICTION_MODE bmode) { } } -extern const uint8_t vp9_block2left[TX_SIZE_MAX_SB][24]; -extern const uint8_t vp9_block2above[TX_SIZE_MAX_SB][24]; +extern const uint8_t vp9_block2left[TX_SIZE_MAX_MB][24]; +extern const uint8_t vp9_block2above[TX_SIZE_MAX_MB][24]; +extern const uint8_t vp9_block2left_sb[TX_SIZE_MAX_SB][96]; +extern const uint8_t vp9_block2above_sb[TX_SIZE_MAX_SB][96]; +extern const uint8_t vp9_block2left_sb64[TX_SIZE_MAX_SB][384]; +extern const uint8_t vp9_block2above_sb64[TX_SIZE_MAX_SB][384]; #define USE_ADST_FOR_I16X16_8X8 0 #define USE_ADST_FOR_I16X16_4X4 0 diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 6309566a7..204e65af6 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -270,85 +270,85 @@ static const vp9_coeff_probs default_coef_probs_8x8[BLOCK_TYPES] = { }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 202, 29, 181, 221, 168, 177, 217, 162, 235, 202, 157 }, - { 117, 39, 146, 207, 155, 172, 203, 155, 236, 192, 208 }, - { 46, 40, 99, 171, 136, 161, 176, 140, 229, 177, 208 } + { 210, 33, 210, 232, 185, 185, 210, 166, 207, 192, 146 }, + { 118, 47, 169, 220, 170, 179, 201, 160, 231, 183, 211 }, + { 40, 52, 119, 203, 146, 169, 207, 160, 242, 194, 222 } }, { /* Coeff Band 1 */ - { 1, 138, 204, 227, 179, 181, 224, 161, 249, 203, 237 }, - { 116, 138, 209, 227, 179, 180, 222, 165, 248, 204, 241 }, - { 63, 112, 184, 227, 183, 178, 223, 167, 248, 206, 237 }, - { 47, 84, 140, 219, 163, 177, 223, 160, 249, 207, 241 }, - { 25, 53, 76, 179, 120, 156, 217, 152, 248, 205, 232 }, - { 10, 23, 29, 76, 91, 132, 145, 109, 228, 169, 214 } + { 1, 158, 215, 239, 192, 188, 234, 174, 253, 219, 230 }, + { 130, 149, 210, 238, 191, 188, 233, 174, 253, 221, 240 }, + { 59, 123, 193, 237, 188, 187, 232, 174, 252, 220, 246 }, + { 22, 89, 154, 232, 172, 183, 233, 173, 253, 219, 237 }, + { 4, 49, 83, 193, 128, 160, 227, 161, 253, 219, 233 }, + { 1, 18, 27, 87, 90, 133, 160, 112, 242, 185, 231 } }, { /* Coeff Band 2 */ - { 1, 69, 198, 223, 179, 177, 225, 154, 251, 208, 227 }, - { 78, 78, 170, 223, 170, 179, 218, 162, 248, 203, 245 }, - { 26, 69, 117, 209, 154, 170, 215, 160, 249, 205, 239 }, - { 16, 54, 79, 180, 119, 156, 208, 151, 248, 201, 238 }, - { 12, 43, 45, 119, 102, 142, 186, 126, 245, 193, 236 }, - { 1, 24, 22, 60, 92, 133, 114, 99, 221, 154, 210 } + { 1, 87, 205, 244, 192, 193, 239, 188, 252, 220, 217 }, + { 64, 93, 169, 237, 175, 186, 237, 184, 253, 222, 235 }, + { 19, 77, 130, 222, 154, 175, 231, 173, 253, 221, 223 }, + { 6, 59, 95, 196, 132, 162, 223, 160, 251, 215, 240 }, + { 1, 37, 57, 144, 109, 146, 201, 135, 250, 205, 238 }, + { 1, 17, 26, 81, 94, 138, 135, 107, 232, 168, 223 } }, { /* Coeff Band 3 */ - { 1, 135, 214, 222, 183, 178, 230, 144, 252, 208, 241 }, - { 107, 122, 201, 229, 181, 182, 221, 165, 250, 202, 243 }, - { 38, 100, 168, 221, 168, 176, 220, 166, 250, 208, 240 }, - { 21, 83, 125, 206, 149, 167, 217, 160, 250, 209, 238 }, - { 16, 65, 80, 164, 122, 156, 208, 139, 250, 206, 246 }, - { 3, 37, 43, 104, 103, 143, 156, 118, 237, 173, 227 } + { 1, 150, 219, 243, 198, 192, 237, 182, 253, 227, 245 }, + { 88, 130, 202, 239, 190, 188, 236, 180, 253, 224, 255 }, + { 25, 103, 172, 231, 175, 182, 234, 174, 253, 227, 248 }, + { 7, 78, 128, 215, 156, 172, 228, 166, 252, 222, 248 }, + { 1, 48, 76, 175, 121, 155, 212, 149, 251, 213, 237 }, + { 1, 22, 35, 101, 97, 141, 161, 120, 236, 181, 213 } }, { /* Coeff Band 4 */ - { 1, 169, 223, 233, 193, 184, 234, 150, 254, 206, 243 }, - { 83, 140, 201, 233, 184, 185, 228, 168, 252, 203, 223 }, - { 19, 104, 158, 225, 168, 179, 228, 169, 253, 207, 248 }, - { 10, 76, 117, 209, 145, 168, 223, 166, 252, 210, 243 }, - { 8, 59, 79, 163, 119, 153, 213, 142, 250, 205, 230 }, - { 1, 31, 43, 100, 103, 144, 149, 116, 240, 171, 221 } + { 1, 177, 228, 247, 206, 197, 243, 191, 255, 232, 255 }, + { 76, 143, 205, 243, 192, 192, 241, 189, 253, 223, 255 }, + { 17, 107, 163, 233, 170, 183, 239, 183, 253, 227, 218 }, + { 3, 75, 118, 216, 147, 171, 234, 174, 253, 220, 249 }, + { 1, 43, 71, 174, 118, 154, 217, 153, 250, 211, 240 }, + { 1, 19, 31, 93, 93, 136, 154, 116, 235, 178, 228 } }, { /* Coeff Band 5 */ - { 1, 190, 234, 247, 211, 197, 239, 172, 255, 208, 236 }, - { 65, 152, 218, 244, 199, 194, 236, 184, 252, 199, 249 }, - { 17, 109, 173, 237, 179, 186, 235, 183, 250, 205, 255 }, - { 6, 78, 127, 219, 153, 173, 231, 177, 251, 210, 249 }, - { 3, 56, 77, 172, 121, 157, 215, 152, 249, 209, 247 }, - { 1, 29, 38, 96, 97, 144, 152, 114, 239, 169, 243 } + { 1, 192, 230, 251, 215, 205, 245, 201, 254, 229, 255 }, + { 66, 142, 206, 248, 200, 202, 244, 197, 255, 224, 255 }, + { 21, 107, 166, 241, 176, 191, 241, 192, 253, 230, 255 }, + { 5, 79, 129, 221, 150, 173, 237, 178, 254, 226, 255 }, + { 1, 43, 72, 173, 117, 151, 217, 150, 253, 216, 245 }, + { 1, 17, 28, 93, 95, 139, 162, 114, 245, 187, 235 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 223, 71, 225, 221, 176, 169, 242, 165, 248, 216, 201 }, - { 147, 79, 197, 215, 175, 172, 230, 154, 243, 203, 184 }, - { 69, 75, 152, 197, 158, 168, 203, 144, 231, 187, 177 } + { 235, 68, 250, 244, 206, 192, 229, 177, 248, 215, 237 }, + { 169, 88, 225, 235, 191, 184, 222, 170, 246, 205, 237 }, + { 65, 100, 171, 214, 166, 173, 216, 157, 249, 213, 215 } }, { /* Coeff Band 1 */ - { 1, 168, 219, 195, 168, 151, 249, 131, 255, 221, 255 }, - { 152, 156, 226, 210, 189, 173, 240, 121, 255, 215, 238 }, - { 82, 128, 198, 239, 201, 194, 220, 151, 254, 202, 251 }, - { 74, 107, 150, 236, 163, 187, 222, 177, 255, 204, 255 }, - { 59, 103, 120, 181, 125, 148, 232, 157, 255, 219, 245 }, - { 21, 63, 84, 129, 122, 150, 171, 118, 246, 196, 226 } + { 1, 191, 246, 250, 217, 202, 244, 195, 255, 226, 128 }, + { 177, 169, 236, 250, 216, 201, 244, 194, 251, 228, 255 }, + { 70, 132, 205, 250, 209, 205, 246, 193, 254, 246, 255 }, + { 41, 108, 165, 244, 172, 194, 246, 202, 255, 229, 255 }, + { 23, 84, 126, 207, 140, 162, 244, 179, 254, 237, 255 }, + { 11, 57, 83, 149, 127, 156, 180, 126, 247, 202, 220 } }, { /* Coeff Band 2 */ - { 1, 133, 219, 202, 174, 158, 244, 133, 255, 214, 237 }, - { 101, 132, 204, 221, 187, 183, 225, 131, 253, 201, 247 }, - { 41, 107, 147, 228, 174, 187, 211, 162, 252, 201, 246 }, - { 40, 107, 107, 205, 129, 162, 213, 164, 252, 206, 232 }, - { 24, 140, 90, 122, 111, 141, 210, 127, 251, 208, 239 }, - { 1, 59, 55, 91, 111, 141, 144, 109, 241, 180, 226 } + { 1, 169, 240, 250, 212, 202, 242, 192, 252, 222, 255 }, + { 105, 151, 215, 246, 200, 197, 240, 190, 253, 221, 255 }, + { 24, 111, 166, 237, 177, 188, 236, 183, 252, 213, 255 }, + { 9, 83, 122, 218, 148, 170, 233, 174, 250, 215, 242 }, + { 1, 55, 77, 168, 118, 152, 215, 150, 248, 213, 226 }, + { 1, 26, 36, 104, 98, 146, 149, 116, 235, 182, 225 } }, { /* Coeff Band 3 */ - { 1, 170, 226, 200, 179, 153, 245, 138, 255, 214, 241 }, - { 111, 149, 217, 226, 194, 186, 223, 137, 255, 211, 253 }, - { 40, 113, 174, 228, 180, 183, 211, 165, 255, 212, 247 }, - { 44, 101, 126, 210, 151, 167, 212, 161, 255, 217, 241 }, - { 43, 131, 103, 146, 119, 148, 211, 136, 254, 216, 250 }, - { 1, 57, 63, 112, 116, 145, 158, 115, 249, 193, 236 } + { 1, 191, 243, 251, 219, 204, 246, 196, 255, 230, 128 }, + { 97, 168, 225, 248, 207, 198, 244, 193, 254, 225, 192 }, + { 15, 122, 182, 241, 187, 188, 241, 190, 251, 231, 228 }, + { 3, 83, 131, 226, 160, 178, 237, 180, 251, 222, 205 }, + { 1, 49, 77, 184, 121, 155, 222, 159, 249, 216, 249 }, + { 1, 21, 32, 98, 98, 140, 152, 113, 233, 173, 243 } }, { /* Coeff Band 4 */ - { 1, 186, 233, 216, 191, 163, 241, 143, 255, 210, 255 }, - { 91, 161, 214, 225, 190, 181, 224, 150, 255, 212, 253 }, - { 26, 117, 163, 220, 172, 180, 218, 148, 255, 215, 252 }, - { 27, 90, 122, 203, 143, 167, 212, 159, 255, 213, 255 }, - { 21, 98, 113, 163, 130, 153, 208, 141, 255, 215, 248 }, - { 1, 47, 66, 130, 118, 151, 167, 123, 252, 199, 235 } + { 1, 202, 242, 253, 226, 212, 245, 205, 254, 226, 255 }, + { 83, 168, 219, 252, 212, 211, 244, 200, 250, 215, 255 }, + { 9, 143, 174, 245, 183, 197, 241, 194, 254, 217, 255 }, + { 1, 105, 129, 228, 154, 179, 233, 179, 253, 211, 255 }, + { 1, 47, 72, 177, 116, 152, 214, 157, 251, 209, 255 }, + { 1, 18, 26, 79, 94, 137, 150, 109, 246, 175, 248 } }, { /* Coeff Band 5 */ - { 1, 195, 236, 245, 211, 195, 238, 171, 255, 209, 248 }, - { 65, 156, 218, 245, 200, 196, 230, 185, 255, 212, 248 }, - { 13, 112, 172, 238, 180, 189, 231, 185, 255, 213, 250 }, - { 6, 83, 130, 224, 155, 177, 227, 180, 255, 214, 244 }, - { 5, 71, 91, 185, 133, 160, 214, 154, 254, 212, 248 }, - { 1, 45, 63, 128, 112, 147, 169, 129, 248, 190, 236 } + { 1, 205, 236, 254, 233, 221, 247, 201, 255, 220, 128 }, + { 87, 149, 205, 254, 211, 219, 245, 207, 255, 239, 128 }, + { 56, 122, 162, 248, 164, 195, 246, 211, 255, 231, 128 }, + { 26, 108, 163, 224, 149, 169, 240, 187, 255, 238, 255 }, + { 1, 54, 89, 171, 123, 152, 219, 148, 254, 226, 255 }, + { 1, 21, 34, 99, 90, 140, 174, 112, 252, 210, 255 } } } } @@ -441,90 +441,90 @@ static const vp9_coeff_probs default_coef_probs_16x16[BLOCK_TYPES] = { }, { /* block Type 1 */ { /* Intra */ { /* Coeff Band 0 */ - { 198, 28, 192, 217, 170, 174, 201, 162, 219, 179, 159 }, - { 96, 36, 145, 198, 153, 167, 193, 153, 222, 180, 177 }, - { 31, 35, 89, 156, 131, 157, 166, 136, 214, 170, 178 } + { 203, 35, 218, 235, 189, 187, 194, 174, 175, 150, 127 }, + { 95, 50, 155, 211, 161, 173, 190, 163, 198, 161, 187 }, + { 21, 46, 93, 178, 130, 157, 200, 151, 224, 186, 191 } }, { /* Coeff Band 1 */ - { 1, 138, 202, 225, 174, 178, 218, 164, 243, 200, 201 }, - { 147, 134, 202, 223, 174, 177, 215, 162, 243, 204, 220 }, - { 65, 115, 179, 224, 176, 177, 215, 162, 243, 202, 227 }, - { 25, 86, 141, 217, 163, 177, 216, 159, 243, 201, 225 }, - { 6, 48, 79, 181, 125, 157, 209, 151, 244, 201, 212 }, - { 1, 16, 25, 77, 91, 134, 132, 112, 210, 162, 180 } + { 1, 155, 198, 236, 183, 187, 223, 175, 250, 209, 255 }, + { 115, 147, 192, 235, 182, 186, 222, 173, 244, 199, 222 }, + { 43, 124, 174, 234, 178, 186, 222, 176, 249, 201, 255 }, + { 13, 96, 143, 227, 164, 181, 223, 174, 248, 197, 237 }, + { 2, 59, 91, 197, 131, 163, 213, 162, 246, 198, 241 }, + { 1, 19, 29, 85, 96, 139, 128, 116, 215, 153, 204 } }, { /* Coeff Band 2 */ - { 1, 78, 195, 222, 172, 177, 219, 162, 245, 205, 227 }, - { 67, 79, 154, 211, 158, 171, 212, 159, 243, 201, 222 }, - { 18, 63, 108, 192, 140, 163, 205, 152, 242, 197, 214 }, - { 6, 49, 77, 163, 121, 154, 192, 142, 239, 191, 216 }, - { 1, 34, 49, 112, 106, 143, 160, 122, 233, 178, 213 }, - { 1, 14, 20, 56, 93, 135, 94, 102, 189, 141, 170 } + { 1, 91, 180, 231, 170, 180, 237, 181, 248, 213, 230 }, + { 39, 83, 139, 220, 153, 173, 233, 179, 243, 200, 228 }, + { 12, 63, 106, 203, 136, 163, 227, 170, 244, 200, 234 }, + { 5, 48, 79, 178, 123, 154, 215, 155, 244, 197, 232 }, + { 1, 32, 50, 125, 104, 144, 171, 130, 238, 181, 229 }, + { 1, 12, 18, 54, 88, 131, 92, 99, 201, 142, 193 } }, { /* Coeff Band 3 */ - { 1, 137, 210, 229, 182, 181, 223, 164, 247, 214, 201 }, - { 89, 123, 189, 226, 176, 180, 217, 165, 245, 207, 216 }, - { 24, 100, 155, 217, 162, 176, 215, 163, 242, 198, 215 }, - { 8, 78, 121, 199, 147, 167, 206, 155, 241, 198, 212 }, - { 2, 52, 81, 161, 125, 156, 185, 139, 236, 186, 207 }, - { 1, 22, 35, 88, 102, 141, 121, 116, 199, 153, 179 } + { 1, 152, 202, 238, 186, 188, 227, 178, 248, 205, 229 }, + { 63, 125, 183, 234, 178, 184, 225, 179, 248, 205, 228 }, + { 15, 100, 153, 227, 166, 180, 223, 173, 244, 198, 229 }, + { 4, 76, 119, 210, 149, 170, 215, 165, 245, 200, 221 }, + { 1, 46, 73, 165, 120, 154, 192, 144, 241, 189, 225 }, + { 1, 18, 27, 78, 95, 136, 124, 110, 219, 158, 207 } }, { /* Coeff Band 4 */ - { 1, 169, 220, 239, 196, 191, 220, 173, 242, 201, 226 }, - { 64, 139, 195, 231, 183, 184, 215, 169, 240, 196, 211 }, - { 12, 103, 153, 217, 162, 174, 212, 163, 236, 195, 211 }, - { 3, 71, 109, 190, 141, 164, 202, 152, 240, 192, 220 }, - { 1, 38, 61, 139, 114, 149, 175, 133, 233, 183, 211 }, - { 1, 13, 22, 61, 93, 134, 101, 106, 194, 145, 185 } + { 1, 181, 211, 243, 197, 195, 228, 180, 249, 211, 252 }, + { 40, 138, 189, 237, 184, 189, 226, 178, 249, 208, 247 }, + { 7, 103, 153, 226, 166, 179, 223, 171, 249, 209, 224 }, + { 1, 71, 110, 200, 143, 166, 213, 159, 249, 206, 241 }, + { 1, 37, 60, 144, 111, 150, 189, 135, 245, 196, 232 }, + { 1, 15, 25, 75, 91, 134, 128, 108, 224, 163, 213 } }, { /* Coeff Band 5 */ - { 1, 204, 220, 234, 193, 185, 220, 166, 247, 207, 237 }, - { 42, 139, 187, 221, 174, 177, 215, 161, 246, 201, 242 }, - { 5, 83, 132, 204, 152, 168, 212, 158, 246, 203, 225 }, - { 1, 48, 84, 175, 126, 157, 203, 148, 245, 199, 233 }, - { 1, 24, 46, 123, 103, 142, 178, 128, 243, 189, 235 }, - { 1, 10, 19, 58, 88, 134, 109, 101, 216, 151, 216 } + { 1, 215, 219, 246, 205, 197, 236, 183, 252, 221, 235 }, + { 32, 146, 197, 239, 187, 188, 234, 180, 252, 223, 247 }, + { 6, 100, 150, 227, 167, 178, 233, 178, 252, 219, 233 }, + { 1, 63, 102, 203, 138, 167, 225, 162, 252, 216, 240 }, + { 1, 33, 56, 148, 109, 146, 202, 138, 250, 208, 237 }, + { 1, 15, 25, 75, 90, 131, 138, 108, 236, 171, 235 } } }, { /* Inter */ { /* Coeff Band 0 */ - { 227, 36, 243, 237, 206, 186, 210, 157, 245, 195, 200 }, - { 144, 41, 214, 226, 190, 182, 207, 155, 238, 193, 177 }, - { 63, 37, 153, 199, 162, 169, 193, 145, 227, 187, 152 } + { 228, 37, 245, 229, 199, 183, 200, 146, 240, 188, 223 }, + { 138, 62, 209, 217, 184, 177, 195, 148, 246, 186, 236 }, + { 42, 79, 146, 185, 156, 167, 183, 137, 247, 189, 251 } }, { /* Coeff Band 1 */ - { 1, 170, 247, 248, 213, 201, 239, 188, 238, 203, 255 }, - { 214, 166, 242, 248, 212, 198, 236, 191, 221, 219, 199 }, - { 139, 148, 224, 247, 207, 197, 236, 189, 249, 241, 128 }, - { 102, 127, 195, 244, 190, 198, 235, 189, 239, 202, 228 }, - { 76, 106, 154, 227, 159, 176, 234, 182, 243, 216, 229 }, - { 52, 69, 93, 158, 125, 155, 173, 139, 225, 170, 209 } + { 1, 205, 242, 248, 210, 202, 245, 193, 233, 230, 255 }, + { 191, 185, 234, 249, 210, 201, 245, 194, 255, 197, 128 }, + { 112, 148, 214, 247, 208, 201, 246, 192, 255, 238, 128 }, + { 76, 120, 182, 246, 190, 198, 246, 202, 255, 244, 128 }, + { 51, 95, 145, 232, 156, 177, 246, 199, 255, 233, 128 }, + { 47, 71, 104, 195, 129, 158, 230, 167, 253, 224, 255 } }, { /* Coeff Band 2 */ - { 1, 139, 241, 245, 205, 193, 230, 177, 239, 198, 183 }, - { 131, 139, 214, 240, 191, 189, 224, 181, 236, 203, 194 }, - { 32, 102, 157, 228, 167, 177, 221, 174, 235, 191, 194 }, - { 12, 75, 112, 201, 142, 163, 208, 161, 227, 180, 200 }, - { 2, 45, 66, 142, 119, 154, 178, 141, 220, 171, 213 }, - { 1, 15, 20, 56, 102, 151, 87, 104, 182, 136, 175 } + { 1, 182, 235, 247, 204, 195, 246, 202, 255, 227, 128 }, + { 104, 145, 204, 243, 189, 191, 242, 199, 255, 229, 128 }, + { 35, 107, 159, 234, 167, 181, 244, 188, 255, 221, 128 }, + { 17, 87, 126, 216, 151, 168, 242, 179, 255, 242, 128 }, + { 4, 68, 91, 182, 131, 154, 222, 153, 255, 228, 128 }, + { 1, 55, 64, 126, 105, 137, 193, 121, 247, 194, 255 } }, { /* Coeff Band 3 */ - { 1, 174, 243, 248, 212, 201, 237, 194, 249, 207, 255 }, - { 134, 155, 223, 244, 200, 195, 230, 184, 248, 189, 233 }, - { 26, 115, 177, 235, 180, 185, 225, 176, 245, 198, 255 }, - { 8, 82, 129, 217, 156, 175, 220, 168, 243, 204, 228 }, - { 3, 48, 75, 165, 122, 155, 193, 145, 245, 189, 199 }, - { 1, 15, 27, 73, 101, 139, 117, 112, 212, 157, 209 } + { 1, 210, 239, 249, 209, 201, 249, 205, 255, 255, 128 }, + { 91, 162, 218, 247, 200, 195, 250, 199, 255, 255, 128 }, + { 16, 116, 173, 242, 184, 190, 251, 193, 255, 205, 128 }, + { 5, 85, 133, 228, 156, 178, 244, 184, 255, 251, 128 }, + { 1, 55, 83, 196, 125, 164, 236, 168, 249, 249, 255 }, + { 1, 24, 39, 127, 92, 154, 183, 133, 255, 192, 128 } }, { /* Coeff Band 4 */ - { 1, 191, 244, 248, 214, 200, 229, 185, 249, 207, 255 }, - { 106, 167, 221, 242, 198, 192, 223, 178, 245, 202, 246 }, - { 13, 117, 169, 229, 175, 182, 220, 170, 244, 202, 226 }, - { 2, 74, 114, 203, 143, 170, 211, 160, 248, 199, 232 }, - { 1, 35, 58, 141, 111, 144, 184, 132, 244, 196, 239 }, - { 1, 12, 22, 66, 91, 138, 114, 102, 225, 156, 214 } + { 1, 225, 242, 252, 218, 205, 251, 207, 255, 255, 128 }, + { 67, 174, 223, 249, 205, 199, 250, 210, 255, 234, 128 }, + { 10, 119, 177, 243, 186, 187, 253, 199, 255, 255, 128 }, + { 2, 81, 129, 228, 154, 177, 244, 193, 255, 251, 128 }, + { 1, 48, 78, 193, 122, 152, 240, 171, 255, 240, 128 }, + { 1, 19, 43, 116, 96, 128, 195, 135, 255, 234, 128 } }, { /* Coeff Band 5 */ - { 1, 220, 231, 246, 203, 196, 239, 188, 255, 212, 255 }, - { 42, 155, 203, 241, 189, 191, 235, 184, 253, 220, 255 }, - { 4, 95, 151, 230, 167, 182, 234, 178, 252, 217, 243 }, - { 1, 61, 105, 206, 140, 168, 226, 167, 250, 215, 242 }, - { 1, 31, 60, 151, 109, 148, 204, 142, 250, 208, 230 }, - { 1, 13, 26, 76, 93, 132, 139, 106, 236, 171, 237 } + { 1, 237, 210, 255, 213, 219, 255, 235, 255, 219, 128 }, + { 49, 163, 203, 252, 182, 198, 255, 235, 255, 255, 128 }, + { 23, 114, 156, 247, 196, 187, 255, 238, 255, 255, 128 }, + { 6, 71, 124, 248, 163, 202, 253, 203, 255, 255, 128 }, + { 1, 35, 74, 226, 160, 162, 246, 189, 255, 244, 128 }, + { 1, 16, 19, 136, 92, 164, 237, 108, 255, 255, 128 } } } } }; -static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { +static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES] = { { /* block Type 0 */ { /* Intra */ { /* Coeff Band 0 */ @@ -609,5 +609,89 @@ static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES_32X32] = { { 1, 9, 16, 48, 89, 134, 89, 99, 183, 140, 169 } } } + }, { /* block Type 1 */ + { /* Intra */ + { /* Coeff Band 0 */ + { 176, 22, 201, 227, 185, 189, 160, 172, 115, 141, 105 }, + { 64, 33, 120, 195, 149, 171, 170, 150, 182, 175, 139 }, + { 12, 33, 68, 151, 118, 153, 172, 138, 202, 175, 153 } + }, { /* Coeff Band 1 */ + { 1, 125, 175, 228, 163, 176, 215, 171, 226, 193, 165 }, + { 127, 126, 174, 224, 163, 177, 212, 167, 225, 175, 235 }, + { 57, 114, 159, 223, 166, 175, 216, 167, 234, 182, 211 }, + { 23, 93, 133, 215, 150, 174, 216, 171, 233, 174, 176 }, + { 4, 56, 84, 178, 127, 157, 209, 149, 233, 197, 194 }, + { 1, 19, 26, 70, 93, 136, 114, 108, 193, 150, 167 } + }, { /* Coeff Band 2 */ + { 1, 76, 172, 217, 161, 172, 216, 165, 240, 188, 226 }, + { 41, 73, 136, 208, 152, 168, 214, 163, 233, 189, 248 }, + { 14, 59, 102, 195, 137, 163, 209, 158, 227, 184, 204 }, + { 4, 45, 75, 168, 122, 153, 197, 148, 231, 193, 178 }, + { 1, 33, 48, 118, 106, 148, 154, 126, 221, 168, 211 }, + { 1, 12, 16, 42, 90, 143, 61, 94, 159, 122, 167 } + }, { /* Coeff Band 3 */ + { 1, 134, 186, 226, 173, 180, 208, 172, 220, 179, 205 }, + { 60, 114, 164, 219, 166, 177, 207, 166, 231, 176, 208 }, + { 18, 90, 134, 208, 152, 175, 200, 164, 225, 181, 199 }, + { 7, 67, 102, 189, 139, 164, 192, 155, 225, 172, 209 }, + { 1, 39, 59, 137, 116, 151, 160, 132, 222, 166, 212 }, + { 1, 12, 17, 50, 93, 134, 82, 102, 181, 131, 190 } + }, { /* Coeff Band 4 */ + { 1, 160, 195, 229, 180, 185, 204, 163, 243, 185, 223 }, + { 31, 124, 170, 221, 170, 179, 201, 164, 240, 183, 223 }, + { 5, 91, 134, 204, 154, 170, 191, 155, 236, 178, 232 }, + { 1, 62, 95, 173, 135, 159, 180, 145, 234, 179, 225 }, + { 1, 30, 48, 116, 109, 147, 152, 123, 231, 170, 224 }, + { 1, 11, 17, 53, 90, 133, 93, 102, 201, 139, 202 } + }, { /* Coeff Band 5 */ + { 1, 215, 203, 233, 186, 183, 226, 170, 249, 213, 225 }, + { 13, 133, 175, 224, 170, 178, 224, 167, 250, 212, 235 }, + { 1, 83, 127, 209, 151, 169, 221, 162, 251, 212, 243 }, + { 1, 53, 85, 182, 127, 157, 213, 153, 250, 210, 234 }, + { 1, 30, 47, 131, 103, 143, 190, 132, 248, 200, 240 }, + { 1, 14, 21, 67, 89, 129, 126, 104, 232, 167, 223 } + } + }, { /* Inter */ + { /* Coeff Band 0 */ + { 193, 35, 239, 239, 206, 194, 195, 152, 244, 200, 255 }, + { 77, 57, 198, 224, 192, 187, 181, 145, 242, 190, 248 }, + { 21, 54, 149, 197, 172, 171, 169, 138, 238, 178, 241 } + }, { /* Coeff Band 1 */ + { 1, 227, 241, 247, 195, 195, 245, 199, 255, 255, 128 }, + { 189, 223, 237, 249, 199, 200, 238, 198, 255, 255, 128 }, + { 125, 204, 226, 247, 198, 199, 251, 213, 255, 255, 128 }, + { 101, 167, 207, 246, 193, 201, 245, 168, 255, 255, 128 }, + { 89, 121, 174, 237, 169, 184, 246, 204, 255, 255, 128 }, + { 71, 79, 135, 216, 149, 170, 234, 168, 255, 226, 128 } + }, { /* Coeff Band 2 */ + { 1, 207, 235, 250, 220, 204, 250, 201, 255, 255, 128 }, + { 103, 160, 210, 245, 195, 188, 249, 195, 255, 255, 128 }, + { 33, 130, 165, 234, 168, 183, 253, 199, 255, 255, 128 }, + { 10, 113, 138, 223, 146, 180, 248, 199, 255, 255, 128 }, + { 1, 88, 104, 172, 112, 174, 221, 126, 255, 217, 128 }, + { 1, 87, 70, 160, 68, 140, 171, 85, 255, 85, 128 } + }, { /* Coeff Band 3 */ + { 1, 230, 240, 249, 209, 200, 243, 199, 255, 228, 128 }, + { 60, 178, 218, 247, 203, 200, 247, 198, 255, 255, 128 }, + { 8, 119, 162, 241, 188, 185, 252, 202, 255, 255, 128 }, + { 2, 78, 119, 218, 149, 162, 247, 184, 255, 255, 128 }, + { 1, 48, 81, 172, 142, 148, 239, 140, 255, 239, 128 }, + { 1, 29, 23, 82, 96, 102, 181, 149, 255, 255, 128 } + }, { /* Coeff Band 4 */ + { 1, 240, 241, 250, 216, 203, 248, 188, 255, 255, 128 }, + { 60, 180, 222, 247, 202, 195, 247, 191, 255, 255, 128 }, + { 9, 120, 169, 240, 190, 189, 249, 181, 255, 255, 128 }, + { 2, 85, 126, 223, 154, 178, 240, 184, 255, 255, 128 }, + { 1, 47, 90, 198, 132, 158, 233, 162, 255, 224, 128 }, + { 1, 33, 34, 143, 116, 156, 217, 128, 255, 255, 128 } + }, { /* Coeff Band 5 */ + { 1, 250, 193, 249, 188, 193, 255, 236, 255, 255, 128 }, + { 35, 187, 185, 247, 154, 184, 255, 247, 255, 171, 128 }, + { 20, 132, 114, 223, 172, 165, 255, 229, 255, 255, 128 }, + { 4, 97, 96, 218, 96, 162, 255, 164, 255, 253, 128 }, + { 1, 57, 35, 197, 154, 173, 254, 215, 255, 255, 128 }, + { 1, 8, 2, 161, 10, 57, 230, 228, 255, 171, 128 } + } + } } }; diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 759b90128..1e3a7e17e 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -336,6 +336,6 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { BLOCK_TYPES, cm->fc.coef_counts_16x16, count_sat, update_factor); update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, - BLOCK_TYPES_32X32, cm->fc.coef_counts_32x32, + BLOCK_TYPES, cm->fc.coef_counts_32x32, count_sat, update_factor); } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 20559a79b..8d28b0058 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -61,7 +61,6 @@ extern vp9_extra_bit_struct vp9_extra_bits[12]; /* indexed by token value */ /* Outside dimension. 0 = Y with DC, 1 = UV */ #define BLOCK_TYPES 2 -#define BLOCK_TYPES_32X32 1 #define REF_TYPES 2 // intra=0, inter=1 /* Middle dimension reflects the coefficient position within the transform. */ @@ -110,12 +109,24 @@ extern DECLARE_ALIGNED(16, const int, vp9_default_zig_zag1d_32x32[1024]); void vp9_coef_tree_initialize(void); void vp9_adapt_coef_probs(struct VP9Common *); -static void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { +static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { /* Clear entropy contexts */ vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } +static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) { + /* Clear entropy contexts */ + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); +} + +static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) { + /* Clear entropy contexts */ + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); +} + extern const int vp9_coef_bands[32]; extern const int vp9_coef_bands4x4[16]; diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c index 673abd7b1..54b79ee64 100644 --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@ -645,7 +645,7 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { // First transform rows for (i = 0; i < 16; ++i) { idct16_1d(input, outptr); - input += half_pitch; + input += 16; outptr += 16; } @@ -655,7 +655,7 @@ void vp9_short_idct16x16_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -838,7 +838,7 @@ static const transform_2d IHT_16[] = { }; void vp9_short_iht16x16_c(int16_t *input, int16_t *output, - int input_pitch, TX_TYPE tx_type) { + int pitch, TX_TYPE tx_type) { int i, j; int16_t out[16 * 16]; int16_t *outptr = out; @@ -848,7 +848,7 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output, // Rows for (i = 0; i < 16; ++i) { ht.rows(input, outptr); - input += input_pitch; + input += 16; outptr += 16; } @@ -858,7 +858,7 @@ void vp9_short_iht16x16_c(int16_t *input, int16_t *output, temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j * 16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -875,7 +875,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { vpx_memset(out, 0, sizeof(out)); for (i = 0; i < 4; ++i) { idct16_1d(input, outptr); - input += half_pitch; + input += 16; outptr += 16; } @@ -885,7 +885,7 @@ void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j*16 + i]; idct16_1d(temp_in, temp_out); for (j = 0; j < 16; ++j) - output[j*16 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -1273,7 +1273,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { // Rows for (i = 0; i < 32; ++i) { idct32_1d(input, outptr); - input += half_pitch; + input += 32; outptr += 32; } @@ -1283,7 +1283,7 @@ void vp9_short_idct32x32_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } @@ -1306,7 +1306,7 @@ void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) { vpx_memset(out, 0, sizeof(out)); for (i = 0; i < 4; ++i) { idct32_1d(input, outptr); - input += half_pitch; + input += 32; outptr += 32; } @@ -1316,6 +1316,6 @@ void vp9_short_idct10_32x32_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j * 32 + i]; idct32_1d(temp_in, temp_out); for (j = 0; j < 32; ++j) - output[j * 32 + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); + output[j * half_pitch + i] = ROUND_POWER_OF_TWO(temp_out[j], 6); } } diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index 1311b9111..a26415fc3 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -11,12 +11,13 @@ #include "vp9/common/vp9_invtrans.h" #include "./vp9_rtcd.h" -void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) { - BLOCKD *b = &xd->block[block]; - if (xd->eobs[block] <= 1) - xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch); +void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, + int16_t *dqcoeff, int16_t *diff, + int pitch) { + if (eob <= 1) + xd->inv_txm4x4_1(dqcoeff, diff, pitch); else - xd->inv_txm4x4(b->dqcoeff, b->diff, pitch); + xd->inv_txm4x4(dqcoeff, diff, pitch); } void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { @@ -27,7 +28,8 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { if (tx_type != DCT_DCT) { vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_4x4(xd, i, 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + xd->block[i].diff, 32); } } } @@ -36,7 +38,8 @@ void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) { int i; for (i = 16; i < 24; i++) { - vp9_inverse_transform_b_4x4(xd, i, 16); + vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + xd->block[i].diff, 16); } } @@ -111,13 +114,170 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { vp9_inverse_transform_mbuv_8x8(xd); } -void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb) { - vp9_short_idct32x32(xd_sb->dqcoeff, xd_sb->diff, 64); +void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) { + vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64); } -void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb) { - vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1024, - xd_sb->diff + 1024, 32); - vp9_inverse_transform_b_16x16(xd_sb->dqcoeff + 1280, - xd_sb->diff + 1280, 32); +void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + xd->diff + x_idx * 16 + y_idx * 32 * 16, 64); + } +} + +void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + xd->diff + x_idx * 8 + y_idx * 32 * 8, 64); + } +} + +void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + xd->diff + x_idx * 4 + y_idx * 4 * 32, 64); + } +} + +void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) { + vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024, + xd->diff + 1024, 32); + vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280, + xd->diff + 1280, 32); +} + +void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64, + xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8, + 32); + vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64, + xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8, + 32); + } +} + +void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n], + xd->dqcoeff + 1024 + n * 16, + xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4, + 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n], + xd->dqcoeff + 1280 + n * 16, + xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4, + 32); + } +} + +void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_short_idct32x32(xd->dqcoeff + n * 1024, + xd->diff + x_idx * 32 + y_idx * 32 * 64, 128); + } +} + +void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + xd->diff + x_idx * 16 + y_idx * 64 * 16, 128); + } +} + +void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + xd->diff + x_idx * 8 + y_idx * 64 * 8, 128); + } +} + +void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + xd->diff + x_idx * 4 + y_idx * 4 * 64, 128); + } +} + +void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) { + vp9_short_idct32x32(xd->dqcoeff + 4096, + xd->diff + 4096, 64); + vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024, + xd->diff + 4096 + 1024, 64); +} + +void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16; + + vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256, + xd->diff + 4096 + off, 64); + vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256, + xd->diff + 4096 + 1024 + off, 64); + } +} + +void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8; + + vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64, + xd->diff + 4096 + off, 64); + vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64, + xd->diff + 4096 + 1024 + off, 64); + } +} + +void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4; + + vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n], + xd->dqcoeff + 4096 + n * 16, + xd->diff + 4096 + off, 64); + vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n], + xd->dqcoeff + 4096 + 1024 + n * 16, + xd->diff + 4096 + 1024 + off, 64); + } } diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index abd5b0fad..89916570d 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -15,7 +15,9 @@ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" -void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch); +void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, + int16_t *dqcoeff, int16_t *diff, + int pitch); void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd); @@ -39,7 +41,21 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_32x32(SUPERBLOCKD *xd_sb); -void vp9_inverse_transform_sbuv_16x16(SUPERBLOCKD *xd_sb); +void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd); +void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd); + +void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd); +void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd); #endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index c4bb12340..48d19a332 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -61,7 +61,7 @@ typedef struct frame_contexts { vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; nmv_context nmvc; nmv_context pre_nmvc; @@ -83,12 +83,12 @@ typedef struct frame_contexts { vp9_coeff_probs pre_coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES]; vp9_coeff_count coef_counts_4x4[BLOCK_TYPES]; vp9_coeff_count coef_counts_8x8[BLOCK_TYPES]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES]; - vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; nmv_context_counts NMVcount; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index caf7b8d22..d67b6d3df 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -117,7 +117,7 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { int x, y, stride = xd->block[0].dst_stride; - int16_t *diff = xd->sb_coeff_data.diff; + int16_t *diff = xd->diff; for (y = 0; y < 32; y++) { for (x = 0; x < 32; x++) { @@ -130,8 +130,8 @@ void vp9_recon_sby_s_c(MACROBLOCKD *xd, uint8_t *dst) { void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { int x, y, stride = xd->block[16].dst_stride; - int16_t *udiff = xd->sb_coeff_data.diff + 1024; - int16_t *vdiff = xd->sb_coeff_data.diff + 1280; + int16_t *udiff = xd->diff + 1024; + int16_t *vdiff = xd->diff + 1280; for (y = 0; y < 16; y++) { for (x = 0; x < 16; x++) { @@ -145,6 +145,36 @@ void vp9_recon_sbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { } } +void vp9_recon_sb64y_s_c(MACROBLOCKD *xd, uint8_t *dst) { + int x, y, stride = xd->block[0].dst_stride; + int16_t *diff = xd->diff; + + for (y = 0; y < 64; y++) { + for (x = 0; x < 64; x++) { + dst[x] = clip_pixel(dst[x] + diff[x]); + } + dst += stride; + diff += 64; + } +} + +void vp9_recon_sb64uv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { + int x, y, stride = xd->block[16].dst_stride; + int16_t *udiff = xd->diff + 4096; + int16_t *vdiff = xd->diff + 4096 + 1024; + + for (y = 0; y < 32; y++) { + for (x = 0; x < 32; x++) { + udst[x] = clip_pixel(udst[x] + udiff[x]); + vdst[x] = clip_pixel(vdst[x] + vdiff[x]); + } + udst += stride; + vdst += stride; + udiff += 32; + vdiff += 32; + } +} + void vp9_recon_mby_c(MACROBLOCKD *xd) { int i; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index e6dcff4d1..db1b4673a 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -97,6 +97,12 @@ specialize vp9_recon_sby_s prototype void vp9_recon_sbuv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" specialize void vp9_recon_sbuv_s +prototype void vp9_recon_sb64y_s "struct macroblockd *x, uint8_t *dst" +specialize vp9_recon_sb64y_s + +prototype void vp9_recon_sb64uv_s "struct macroblockd *x, uint8_t *udst, uint8_t *vdst" +specialize void vp9_recon_sb64uv_s + prototype void vp9_build_intra_predictors_mby_s "struct macroblockd *x" specialize vp9_build_intra_predictors_mby_s diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 86806d2d0..055e97b92 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -452,125 +452,12 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, } } -static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n, - int maska, int shiftb) { - int x_idx = n & maska, y_idx = n >> shiftb; - TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_16x16_c( - tx_type, xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); - } else { - vp9_dequant_idct_add_16x16( - xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); - } - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd); -}; - -static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n, - int maska, int shiftb) { - int x_idx = n & maska, y_idx = n >> shiftb; - TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - int i; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - int idx = (ib & 0x02) ? (ib + 2) : ib; - int16_t *q = xd->block[idx].qcoeff; - int16_t *dq = xd->block[0].dequant; - int stride = xd->dst.y_stride; - tx_type = get_tx_type_8x8(xd, &xd->block[ib]); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_8x8_c( - tx_type, q, dq, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - stride, stride, xd->eobs[idx]); - } else { - vp9_dequant_idct_add_8x8_c( - q, dq, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride - + x_idx * 16 + (i & 1) * 8, - stride, stride, xd->eobs[idx]); - } - } - } else { - vp9_dequant_idct_add_y_block_8x8_inplace_c( - xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd); - } - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd); -}; - -static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n, - int maska, int shiftb) { - int x_idx = n & maska, y_idx = n >> shiftb; - TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]); - if (tx_type != DCT_DCT) { - int i; - for (i = 0; i < 16; i++) { - BLOCKD *b = &xd->block[i]; - tx_type = get_tx_type_4x4(xd, b); - if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c( - tx_type, b->qcoeff, b->dequant, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]); - } else { - xd->itxm_add( - b->qcoeff, b->dequant, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride - + x_idx * 16 + (i & 3) * 4, - xd->dst.y_stride, xd->dst.y_stride, xd->eobs[i]); - } - } - } else { - vp9_dequant_idct_add_y_block_4x4_inplace_c( - xd->qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, - xd->dst.y_stride, xd); - } - vp9_dequant_idct_add_uv_block_4x4_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd); -}; - static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { int n, eobtotal; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; - MODE_INFO *orig_mi = xd->mode_info_context; + MODE_INFO *mi = xd->mode_info_context; const int mis = pc->mode_info_stride; assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64); @@ -583,20 +470,7 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, mb_init_dequantizer(pbi, xd); if (xd->mode_info_context->mbmi.mb_skip_coeff) { - int n; - - vp9_reset_mb_tokens_context(xd); - for (n = 1; n <= 3; n++) { - if (mb_col < pc->mb_cols - n) - xd->above_context += n; - if (mb_row < pc->mb_rows - n) - xd->left_context += n; - vp9_reset_mb_tokens_context(xd); - if (mb_col < pc->mb_cols - n) - xd->above_context -= n; - if (mb_row < pc->mb_rows - n) - xd->left_context -= n; - } + vp9_reset_sb64_tokens_context(xd); /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. @@ -617,83 +491,108 @@ static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, } /* dequantization and idct */ - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + eobtotal = vp9_decode_sb64_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; - if (mb_col + x_idx * 2 >= pc->mb_cols || - mb_row + y_idx * 2 >= pc->mb_rows) - continue; - - xd->left_context = pc->left_context + (y_idx << 1); - xd->above_context = pc->above_context + mb_col + (x_idx << 1); - xd->mode_info_context = orig_mi + x_idx * 2 + y_idx * 2 * mis; - eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; - if (mb_row + 1 < pc->mb_rows) { - xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; - } - } else { - vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant, - xd->dst.y_buffer + x_idx * 32 + - xd->dst.y_stride * y_idx * 32, - xd->dst.y_buffer + x_idx * 32 + - xd->dst.y_stride * y_idx * 32, - xd->dst.y_stride, xd->dst.y_stride, - xd->eobs[0]); - vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer + x_idx * 16 + - xd->dst.uv_stride * y_idx * 16, - xd->dst.v_buffer + x_idx * 16 + - xd->dst.uv_stride * y_idx * 16, - xd->dst.uv_stride, xd); - } + if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows) + mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } } else { - for (n = 0; n < 16; n++) { - int x_idx = n & 3, y_idx = n >> 2; - - if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) - continue; - - xd->above_context = pc->above_context + mb_col + x_idx; - xd->left_context = pc->left_context + y_idx; - xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - - eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - continue; - } - - if (tx_size == TX_16X16) { - decode_16x16_sb(pbi, xd, bc, n, 3, 2); - } else if (tx_size == TX_8X8) { - decode_8x8_sb(pbi, xd, bc, n, 3, 2); - } else { - decode_4x4_sb(pbi, xd, bc, n, 3, 2); - } + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024, + xd->block[0].dequant, + xd->dst.y_buffer + x_idx * 32 + y_idx * xd->dst.y_stride * 32, + xd->dst.y_buffer + x_idx * 32 + y_idx * xd->dst.y_stride * 32, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]); + } + vp9_dequant_idct_add_32x32(xd->qcoeff + 4096, + xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]); + vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024, + xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]); + break; + case TX_16X16: // FIXME(rbultje): adst + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); + } + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + n * 256, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.u_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 16]); + vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + 1024 + n * 256, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.v_buffer + y_idx * 16 * xd->dst.uv_stride + x_idx * 16, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]); + } + break; + case TX_8X8: // FIXME(rbultje): adst + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); + } + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 4]); + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 4096 + 1024, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 4]); + } + break; + case TX_4X4: // FIXME(rbultje): adst + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); + } + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + xd->itxm_add(xd->qcoeff + 4096 + n * 16, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n]); + xd->itxm_add(xd->qcoeff + 4096 + 1024 + n * 16, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n]); + } + break; + default: assert(0); } } - - xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context; - xd->mode_info_context = orig_mi; } static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { int n, eobtotal; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; - MODE_INFO *orig_mi = xd->mode_info_context; const int mis = pc->mode_info_stride; assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32); @@ -706,16 +605,7 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, mb_init_dequantizer(pbi, xd); if (xd->mode_info_context->mbmi.mb_skip_coeff) { - vp9_reset_mb_tokens_context(xd); - if (mb_col < pc->mb_cols - 1) - xd->above_context++; - if (mb_row < pc->mb_rows - 1) - xd->left_context++; - vp9_reset_mb_tokens_context(xd); - if (mb_col < pc->mb_cols - 1) - xd->above_context--; - if (mb_row < pc->mb_rows - 1) - xd->left_context--; + vp9_reset_sb_tokens_context(xd); /* Special case: Force the loopfilter to skip when eobtotal and * mb_skip_coeff are zero. @@ -736,56 +626,90 @@ static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, } /* dequantization and idct */ - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; + eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; + if (mb_row + 1 < pc->mb_rows) { + xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; - if (mb_row + 1 < pc->mb_rows) { - xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; - if (mb_col + 1 < pc->mb_cols) - xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; - } - } else { - vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant, - xd->dst.y_buffer, xd->dst.y_buffer, - xd->dst.y_stride, xd->dst.y_stride, - xd->eobs[0]); - vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; } } else { - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) - continue; - - xd->above_context = pc->above_context + mb_col + x_idx; - xd->left_context = pc->left_context + y_idx + (mb_row & 2); - xd->mode_info_context = orig_mi + x_idx + y_idx * mis; - - eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); - if (eobtotal == 0) { // skip loopfilter - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - continue; - } - - if (tx_size == TX_16X16) { - decode_16x16_sb(pbi, xd, bc, n, 1, 1); - } else if (tx_size == TX_8X8) { - decode_8x8_sb(pbi, xd, bc, n, 1, 1); - } else { - decode_4x4_sb(pbi, xd, bc, n, 1, 1); - } + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer, xd->dst.y_buffer, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[0]); + vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, + xd->block[16].dequant, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, xd); + break; + case TX_16X16: // FIXME(rbultje): adst + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_16x16( + xd->qcoeff + n * 256, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]); + } + vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, + xd->block[16].dequant, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.uv_stride, xd); + break; + case TX_8X8: // FIXME(rbultje): adst + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_buffer + y_idx * 8 * xd->dst.y_stride + x_idx * 8, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 4]); + } + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1024, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n * 4]); + vp9_dequant_idct_add_8x8_c(xd->qcoeff + n * 64 + 1280, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n * 4]); + } + break; + case TX_4X4: // FIXME(rbultje): adst + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + xd->itxm_add(xd->qcoeff + n * 16, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_buffer + y_idx * 4 * xd->dst.y_stride + x_idx * 4, + xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n]); + } + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + xd->itxm_add(xd->qcoeff + 1024 + n * 16, + xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.u_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[64 + n]); + xd->itxm_add(xd->qcoeff + 1280 + n * 16, + xd->block[20].dequant, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.v_buffer + y_idx * 4 * xd->dst.uv_stride + x_idx * 4, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[80 + n]); + } + break; + default: assert(0); } - - xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context + (mb_row & 2); - xd->mode_info_context = orig_mi; } } @@ -1187,7 +1111,7 @@ static void read_coef_probs(VP9D_COMP *pbi, BOOL_DECODER* const bc) { read_coef_probs_common(bc, pc->fc.coef_probs_16x16, BLOCK_TYPES); } if (pbi->common.txfm_mode > ALLOW_16X16) { - read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES_32X32); + read_coef_probs_common(bc, pc->fc.coef_probs_32x32, BLOCK_TYPES); } } diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 5a98b1150..85246d830 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -354,7 +354,7 @@ void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, int stride, MACROBLOCKD *xd) { vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, - xd->eobs[16]); + xd->eobs[64]); vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, - xd->eobs[20]); + xd->eobs[80]); } diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index d3fb25ace..a192266ef 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -90,9 +90,8 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, const int *const scan, TX_SIZE txfm_size) { ENTROPY_CONTEXT* const A0 = (ENTROPY_CONTEXT *) xd->above_context; ENTROPY_CONTEXT* const L0 = (ENTROPY_CONTEXT *) xd->left_context; - const int aidx = vp9_block2above[txfm_size][block_idx]; - const int lidx = vp9_block2left[txfm_size][block_idx]; - ENTROPY_CONTEXT above_ec = A0[aidx] != 0, left_ec = L0[lidx] != 0; + int aidx, lidx; + ENTROPY_CONTEXT above_ec, left_ec; FRAME_CONTEXT *const fc = &dx->common.fc; int recent_energy = 0; int pt, c = 0; @@ -101,9 +100,22 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, vp9_coeff_count *coef_counts; const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + aidx = vp9_block2above_sb64[txfm_size][block_idx]; + lidx = vp9_block2left_sb64[txfm_size][block_idx]; + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + aidx = vp9_block2above_sb[txfm_size][block_idx]; + lidx = vp9_block2left_sb[txfm_size][block_idx]; + } else { + aidx = vp9_block2above[txfm_size][block_idx]; + lidx = vp9_block2left[txfm_size][block_idx]; + } + switch (txfm_size) { default: case TX_4X4: + above_ec = A0[aidx] != 0; + left_ec = L0[lidx] != 0; coef_probs = fc->coef_probs_4x4; coef_counts = fc->coef_counts_4x4; break; @@ -240,7 +252,7 @@ SKIP_START: if (type == PLANE_TYPE_UV) { ENTROPY_CONTEXT *A1 = (ENTROPY_CONTEXT *) (xd->above_context + 1); ENTROPY_CONTEXT *L1 = (ENTROPY_CONTEXT *) (xd->left_context + 1); - A1[aidx] = A1[aidx + 1] = L1[aidx] = L1[lidx + 1] = A0[aidx]; + A1[aidx] = A1[aidx + 1] = L1[lidx] = L1[lidx + 1] = A0[aidx]; if (txfm_size >= TX_32X32) { ENTROPY_CONTEXT *A2 = (ENTROPY_CONTEXT *) (xd->above_context + 2); ENTROPY_CONTEXT *L2 = (ENTROPY_CONTEXT *) (xd->left_context + 2); @@ -272,24 +284,181 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; + int i, eobtotal = 0, seg_eob, c; - // Luma block - int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + // Luma block + c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, DCT_DCT, get_eob(xd, segment_id, 1024), - xd->sb_coeff_data.qcoeff, - vp9_default_zig_zag1d_32x32, TX_32X32); - xd->eobs[0] = c; - eobtotal += c; + xd->qcoeff, vp9_default_zig_zag1d_32x32, TX_32X32); + xd->eobs[0] = c; + eobtotal += c; - // 16x16 chroma blocks - seg_eob = get_eob(xd, segment_id, 256); - for (i = 16; i < 24; i += 4) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, - xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, - vp9_default_zig_zag1d_16x16, TX_16X16); - xd->eobs[i] = c; - eobtotal += c; + // 16x16 chroma blocks + seg_eob = get_eob(xd, segment_id, 256); + for (i = 64; i < 96; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_16X16: + // 16x16 luma blocks + seg_eob = get_eob(xd, segment_id, 256); + for (i = 0; i < 64; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + + // 16x16 chroma blocks + for (i = 64; i < 96; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_8X8: + // 8x8 luma blocks + seg_eob = get_eob(xd, segment_id, 64); + for (i = 0; i < 64; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + + // 8x8 chroma blocks + for (i = 64; i < 96; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_4X4: + // 4x4 luma blocks + seg_eob = get_eob(xd, segment_id, 16); + for (i = 0; i < 64; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + + // 4x4 chroma blocks + for (i = 64; i < 96; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + break; + default: assert(0); + } + + return eobtotal; +} + +int vp9_decode_sb64_tokens(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, + BOOL_DECODER* const bc) { + const int segment_id = xd->mode_info_context->mbmi.segment_id; + int i, eobtotal = 0, seg_eob, c; + + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + // Luma block + seg_eob = get_eob(xd, segment_id, 1024); + for (i = 0; i < 256; i += 64) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_32x32, TX_32X32); + xd->eobs[i] = c; + eobtotal += c; + } + + // 32x32 chroma blocks + for (i = 256; i < 384; i += 64) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_32x32, TX_32X32); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_16X16: + // 16x16 luma blocks + seg_eob = get_eob(xd, segment_id, 256); + for (i = 0; i < 256; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + + // 16x16 chroma blocks + for (i = 256; i < 384; i += 16) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_16x16, TX_16X16); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_8X8: + // 8x8 luma blocks + seg_eob = get_eob(xd, segment_id, 64); + for (i = 0; i < 256; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + + // 8x8 chroma blocks + for (i = 256; i < 384; i += 4) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_8x8, TX_8X8); + xd->eobs[i] = c; + eobtotal += c; + } + break; + case TX_4X4: + // 4x4 luma blocks + seg_eob = get_eob(xd, segment_id, 16); + for (i = 0; i < 256; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, + DCT_DCT, seg_eob, xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + + // 4x4 chroma blocks + for (i = 256; i < 384; i++) { + c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, DCT_DCT, seg_eob, + xd->qcoeff + i * 16, + vp9_default_zig_zag1d_4x4, TX_4X4); + xd->eobs[i] = c; + eobtotal += c; + } + break; + default: assert(0); } return eobtotal; diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h index 926a0661f..33a34aeae 100644 --- a/vp9/decoder/vp9_detokenize.h +++ b/vp9/decoder/vp9_detokenize.h @@ -14,8 +14,6 @@ #include "vp9/decoder/vp9_onyxd_int.h" -void vp9_reset_mb_tokens_context(MACROBLOCKD* const); - int vp9_decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, BOOL_DECODER* const bc, PLANE_TYPE type, int i); @@ -27,6 +25,10 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc); +int vp9_decode_sb64_tokens(VP9D_COMP* const pbi, + MACROBLOCKD* const xd, + BOOL_DECODER* const bc); + int vp9_decode_mb_tokens_4x4_uv(VP9D_COMP* const dx, MACROBLOCKD* const xd, BOOL_DECODER* const bc); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 7101947a6..971da0509 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -45,7 +45,7 @@ int intra_mode_stats[VP9_KF_BINTRAMODES] vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES]; vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES]; vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES]; -vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; +vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES]; extern unsigned int active_section; #endif @@ -1229,7 +1229,7 @@ static void build_coeff_contexts(VP9_COMP *cpi) { #ifdef ENTROPY_STATS cpi, context_counters_32x32, #endif - cpi->frame_branch_ct_32x32, BLOCK_TYPES_32X32); + cpi->frame_branch_ct_32x32, BLOCK_TYPES); } static void update_coef_probs_common(vp9_writer* const bc, @@ -1388,7 +1388,7 @@ static void update_coef_probs(VP9_COMP* const cpi, vp9_writer* const bc) { cpi->frame_coef_probs_32x32, cpi->common.fc.coef_probs_32x32, cpi->frame_branch_ct_32x32, - BLOCK_TYPES_32X32); + BLOCK_TYPES); } } @@ -2103,13 +2103,13 @@ void print_tree_update_probs() { fprintf(f, "\n/* Update probabilities for token entropy tree. */\n\n"); print_tree_update_for_type(f, tree_update_hist_4x4, BLOCK_TYPES, - "vp9_coef_update_probs_4x4[BLOCK_TYPES_4X4]"); + "vp9_coef_update_probs_4x4[BLOCK_TYPES]"); print_tree_update_for_type(f, tree_update_hist_8x8, BLOCK_TYPES, - "vp9_coef_update_probs_8x8[BLOCK_TYPES_8X8]"); + "vp9_coef_update_probs_8x8[BLOCK_TYPES]"); print_tree_update_for_type(f, tree_update_hist_16x16, BLOCK_TYPES, - "vp9_coef_update_probs_16x16[BLOCK_TYPES_16X16]"); - print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES_32X32, - "vp9_coef_update_probs_32x32[BLOCK_TYPES_32X32]"); + "vp9_coef_update_probs_16x16[BLOCK_TYPES]"); + print_tree_update_for_type(f, tree_update_hist_32x32, BLOCK_TYPES, + "vp9_coef_update_probs_32x32[BLOCK_TYPES]"); fclose(f); f = fopen("treeupdate.bin", "wb"); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 79a021cfb..560c37171 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -83,20 +83,13 @@ typedef struct { int64_t txfm_rd_diff[NB_TXFM_MODES]; } PICK_MODE_CONTEXT; -typedef struct superblock { - DECLARE_ALIGNED(16, int16_t, src_diff[32*32+16*16*2]); - DECLARE_ALIGNED(16, int16_t, coeff[32*32+16*16*2]); -} SUPERBLOCK; - typedef struct macroblock MACROBLOCK; struct macroblock { - DECLARE_ALIGNED(16, int16_t, src_diff[384]); // 16x16 Y 8x8 U 8x8 V - DECLARE_ALIGNED(16, int16_t, coeff[384]); // 16x16 Y 8x8 U 8x8 V + DECLARE_ALIGNED(16, int16_t, src_diff[64*64+32*32*2]); + DECLARE_ALIGNED(16, int16_t, coeff[64*64+32*32*2]); // 16 Y blocks, 4 U blocks, 4 V blocks, BLOCK block[24]; - SUPERBLOCK sb_coeff_data; - YV12_BUFFER_CONFIG src; MACROBLOCKD e_mbd; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 5271a597c..3b48f46c0 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1820,63 +1820,6 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #endif } -static void update_sb_skip_coeff_state(VP9_COMP *cpi, - ENTROPY_CONTEXT_PLANES ta[4], - ENTROPY_CONTEXT_PLANES tl[4], - TOKENEXTRA *t[4], - TOKENEXTRA **tp, - int skip[4], int output_enabled) { - MACROBLOCK *const x = &cpi->mb; - TOKENEXTRA tokens[4][16 * 25]; - int n_tokens[4], n; - - // if there were no skips, we don't need to do anything - if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) - return; - - // if we don't do coeff skipping for this frame, we don't - // need to do anything here - if (!cpi->common.mb_no_coeff_skip) - return; - - // if all 4 MBs skipped coeff coding, nothing to be done - if (skip[0] && skip[1] && skip[2] && skip[3]) - return; - - // so the situation now is that we want to skip coeffs - // for some MBs, but not all, and we didn't code EOB - // coefficients for them. However, the skip flag for this - // SB will be 0 overall, so we need to insert EOBs in the - // middle of the token tree. Do so here. - n_tokens[0] = t[1] - t[0]; - n_tokens[1] = t[2] - t[1]; - n_tokens[2] = t[3] - t[2]; - n_tokens[3] = *tp - t[3]; - if (n_tokens[0]) - memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0])); - if (n_tokens[1]) - memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0])); - if (n_tokens[2]) - memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0])); - if (n_tokens[3]) - memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0])); - - // reset pointer, stuff EOBs where necessary - *tp = t[0]; - for (n = 0; n < 4; n++) { - if (skip[n]) { - x->e_mbd.above_context = &ta[n]; - x->e_mbd.left_context = &tl[n]; - vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); - } else { - if (n_tokens[n]) { - memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); - } - (*tp) += n_tokens[n]; - } - } -} - static void update_sb64_skip_coeff_state(VP9_COMP *cpi, ENTROPY_CONTEXT_PLANES ta[16], ENTROPY_CONTEXT_PLANES tl[16], @@ -1994,7 +1937,9 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + const int mis = cm->mode_info_stride; unsigned char ref_pred_flag; assert(!xd->mode_info_context->mbmi.sb_type); @@ -2190,12 +2135,11 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_tokenize_mb(cpi, xd, t, !output_enabled); } else { - int mb_skip_context = - cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff : - 0; - if (cpi->common.mb_no_coeff_skip) { + // FIXME(rbultje): not tile-aware (mi - 1) + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; + + if (cm->mb_no_coeff_skip) { mbmi->mb_skip_coeff = 1; if (output_enabled) cpi->skip_true_count[mb_skip_context]++; @@ -2250,12 +2194,8 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; unsigned char ref_pred_flag; - int n; - TOKENEXTRA *tp[4]; - int skip[4]; MODE_INFO *mi = x->e_mbd.mode_info_context; unsigned int segment_id = mi->mbmi.segment_id; - ENTROPY_CONTEXT_PLANES ta[4], tl[4]; const int mis = cm->mode_info_stride; if (cm->frame_type == KEY_FRAME) { @@ -2342,118 +2282,101 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, mb_row, mb_col); } - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - if (!x->skip) { - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, - dst, dst_y_stride); - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, - usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - // TODO(rbultje): trellis optimize - vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); - vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); - vp9_recon_sby_s_c(&x->e_mbd, dst); - vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst); - - vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); - } else { - int mb_skip_context = - cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + - (mi - mis)->mbmi.mb_skip_coeff : - 0; - mi->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts_sb(xd); - } else { - vp9_stuff_sb(cpi, xd, t, !output_enabled); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } - - // copy skip flag on all mb_mode_info contexts in this SB - // if this was a skip at this txfm size - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - } - skip[0] = skip[2] = skip[1] = skip[3] = mi->mbmi.mb_skip_coeff; - } else { - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - xd->left_context = cm->left_context + y_idx + (mb_row & 2); - xd->above_context = cm->above_context + mb_col + x_idx; - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * mis; - - if (!x->skip) { - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_fidct_mb(x); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - - vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; - } else { - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : - 0; - xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; - if (cpi->common.mb_no_coeff_skip) { - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_mb_tokens_context(xd); - } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; + if (!x->skip) { + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, + dst, dst_y_stride); + vp9_subtract_sbuv_s_c(x->src_diff, + usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + switch (mi->mbmi.txfm_size) { + case TX_32X32: + vp9_transform_sby_32x32(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_32x32(x); + vp9_quantize_sbuv_16x16(x); + if (x->optimize) { + vp9_optimize_sby_32x32(x); + vp9_optimize_sbuv_16x16(x); } - } + vp9_inverse_transform_sby_32x32(xd); + vp9_inverse_transform_sbuv_16x16(xd); + break; + case TX_16X16: + vp9_transform_sby_16x16(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_16x16(x); + vp9_quantize_sbuv_16x16(x); + if (x->optimize) { + vp9_optimize_sby_16x16(x); + vp9_optimize_sbuv_16x16(x); + } + vp9_inverse_transform_sby_16x16(xd); + vp9_inverse_transform_sbuv_16x16(xd); + break; + case TX_8X8: + vp9_transform_sby_8x8(x); + vp9_transform_sbuv_8x8(x); + vp9_quantize_sby_8x8(x); + vp9_quantize_sbuv_8x8(x); + if (x->optimize) { + vp9_optimize_sby_8x8(x); + vp9_optimize_sbuv_8x8(x); + } + vp9_inverse_transform_sby_8x8(xd); + vp9_inverse_transform_sbuv_8x8(xd); + break; + case TX_4X4: + vp9_transform_sby_4x4(x); + vp9_transform_sbuv_4x4(x); + vp9_quantize_sby_4x4(x); + vp9_quantize_sbuv_4x4(x); + if (x->optimize) { + vp9_optimize_sby_4x4(x); + vp9_optimize_sbuv_4x4(x); + } + vp9_inverse_transform_sby_4x4(xd); + vp9_inverse_transform_sbuv_4x4(xd); + break; + default: assert(0); } + vp9_recon_sby_s_c(xd, dst); + vp9_recon_sbuv_s_c(xd, udst, vdst); - xd->mode_info_context = mi; - update_sb_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled); + vp9_tokenize_sb(cpi, xd, t, !output_enabled); + } else { + // FIXME(rbultje): not tile-aware (mi - 1) + int mb_skip_context = cm->mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; + + mi->mbmi.mb_skip_coeff = 1; + if (cm->mb_no_coeff_skip) { + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_sb_tokens_context(xd); + } else { + vp9_stuff_sb(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; + } + } + + // copy skip flag on all mb_mode_info contexts in this SB + // if this was a skip at this txfm size + if (mb_col < cm->mb_cols - 1) + mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + if (mb_row < cm->mb_rows - 1) { + mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + if (mb_col < cm->mb_cols - 1) + mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || + !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; } else { - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? - TX_32X32 : - cm->txfm_mode; + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; mi->mbmi.txfm_size = sz; if (mb_col < cm->mb_cols - 1) mi[1].mbmi.txfm_size = sz; @@ -2481,11 +2404,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; unsigned char ref_pred_flag; int n; - TOKENEXTRA *tp[16]; - int skip[16]; MODE_INFO *mi = x->e_mbd.mode_info_context; unsigned int segment_id = mi->mbmi.segment_id; - ENTROPY_CONTEXT_PLANES ta[16], tl[16]; const int mis = cm->mode_info_stride; if (cm->frame_type == KEY_FRAME) { @@ -2571,149 +2491,99 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, mb_row, mb_col); } - if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { - int n; + if (!x->skip) { + vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); + vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); - for (n = 0; n < 4; n++) { - int x_idx = n & 1, y_idx = n >> 1; - - xd->mode_info_context = mi + x_idx * 2 + mis * y_idx * 2; - xd->left_context = cm->left_context + (y_idx << 1); - xd->above_context = cm->above_context + mb_col + (x_idx << 1); - memcpy(&ta[n * 2], xd->above_context, sizeof(*ta) * 2); - memcpy(&tl[n * 2], xd->left_context, sizeof(*tl) * 2); - tp[n] = *t; - xd->mode_info_context = mi + x_idx * 2 + y_idx * mis * 2; - if (!x->skip) { - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, - src + x_idx * 32 + y_idx * 32 * src_y_stride, - src_y_stride, - dst + x_idx * 32 + y_idx * 32 * dst_y_stride, - dst_y_stride); - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, - usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, - src_uv_stride, - udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - dst_uv_stride); - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - // TODO(rbultje): trellis optimize - vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); - vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); - vp9_recon_sby_s_c(&x->e_mbd, - dst + 32 * x_idx + 32 * y_idx * dst_y_stride); - vp9_recon_sbuv_s_c(&x->e_mbd, - udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride); - - vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); - } else { - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + - (mi - mis)->mbmi.mb_skip_coeff : 0; - xd->mode_info_context->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_fix_contexts_sb(xd); - } else { - vp9_stuff_sb(cpi, xd, t, !output_enabled); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + vp9_transform_sb64y_32x32(x); + vp9_transform_sb64uv_32x32(x); + vp9_quantize_sb64y_32x32(x); + vp9_quantize_sb64uv_32x32(x); + if (x->optimize) { + vp9_optimize_sb64y_32x32(x); + vp9_optimize_sb64uv_32x32(x); } - } - - // copy skip flag on all mb_mode_info contexts in this SB - // if this was a skip at this txfm size - if (mb_col + x_idx * 2 < cm->mb_cols - 1) - mi[mis * y_idx * 2 + x_idx * 2 + 1].mbmi.mb_skip_coeff = - mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; - if (mb_row + y_idx * 2 < cm->mb_rows - 1) { - mi[mis * y_idx * 2 + x_idx * 2 + mis].mbmi.mb_skip_coeff = - mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; - if (mb_col + x_idx * 2 < cm->mb_cols - 1) - mi[mis * y_idx * 2 + x_idx * 2 + mis + 1].mbmi.mb_skip_coeff = - mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; - } - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + vp9_inverse_transform_sb64y_32x32(xd); + vp9_inverse_transform_sb64uv_32x32(xd); + break; + case TX_16X16: + vp9_transform_sb64y_16x16(x); + vp9_transform_sb64uv_16x16(x); + vp9_quantize_sb64y_16x16(x); + vp9_quantize_sb64uv_16x16(x); + if (x->optimize) { + vp9_optimize_sb64y_16x16(x); + vp9_optimize_sb64uv_16x16(x); + } + vp9_inverse_transform_sb64y_16x16(xd); + vp9_inverse_transform_sb64uv_16x16(xd); + break; + case TX_8X8: + vp9_transform_sb64y_8x8(x); + vp9_transform_sb64uv_8x8(x); + vp9_quantize_sb64y_8x8(x); + vp9_quantize_sb64uv_8x8(x); + if (x->optimize) { + vp9_optimize_sb64y_8x8(x); + vp9_optimize_sb64uv_8x8(x); + } + vp9_inverse_transform_sb64y_8x8(xd); + vp9_inverse_transform_sb64uv_8x8(xd); + break; + case TX_4X4: + vp9_transform_sb64y_4x4(x); + vp9_transform_sb64uv_4x4(x); + vp9_quantize_sb64y_4x4(x); + vp9_quantize_sb64uv_4x4(x); + if (x->optimize) { + vp9_optimize_sb64y_4x4(x); + vp9_optimize_sb64uv_4x4(x); + } + vp9_inverse_transform_sb64y_4x4(xd); + vp9_inverse_transform_sb64uv_4x4(xd); + break; + default: assert(0); } + vp9_recon_sb64y_s_c(xd, dst); + vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst); + + vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled); } else { - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + // FIXME(rbultje): not tile-aware (mi - 1) + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; - xd->left_context = cm->left_context + y_idx; - xd->above_context = cm->above_context + mb_col + x_idx; - memcpy(&ta[n], xd->above_context, sizeof(ta[n])); - memcpy(&tl[n], xd->left_context, sizeof(tl[n])); - tp[n] = *t; - xd->mode_info_context = mi + x_idx + y_idx * mis; - - if (!x->skip) { - vp9_subtract_mby_s_c(x->src_diff, - src + x_idx * 16 + y_idx * 16 * src_y_stride, - src_y_stride, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride, - dst_y_stride); - vp9_subtract_mbuv_s_c(x->src_diff, - usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, - src_uv_stride, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - dst_uv_stride); - vp9_fidct_mb(x); - vp9_recon_mby_s_c(&x->e_mbd, - dst + x_idx * 16 + y_idx * 16 * dst_y_stride); - vp9_recon_mbuv_s_c(&x->e_mbd, - udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, - vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); - - vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); - skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; - } else { - int mb_skip_context = cpi->common.mb_no_coeff_skip ? - (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + - (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : 0; - xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; - if (cpi->common.mb_no_coeff_skip) { - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_mb_tokens_context(xd); - } else { - vp9_stuff_mb(cpi, xd, t, !output_enabled); - // TODO(rbultje) this should be done per-sb instead of per-mb? - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (cm->mb_no_coeff_skip) { + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_sb64_tokens_context(xd); + } else { + vp9_stuff_sb64(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; } } - xd->mode_info_context = mi; - update_sb64_skip_coeff_state(cpi, ta, tl, tp, t, skip, output_enabled); + // copy skip flag on all mb_mode_info contexts in this SB + // if this was a skip at this txfm size + for (n = 1; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + if (mb_col + x_idx < cm->mb_cols && mb_row + y_idx < cm->mb_rows) + mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; + } if (output_enabled) { if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && - ((mi->mbmi.txfm_size == TX_32X32 && - skip[0] && skip[1] && skip[2] && skip[3]) || - (mi->mbmi.txfm_size != TX_32X32 && - skip[0] && skip[1] && skip[2] && skip[3] && - skip[4] && skip[5] && skip[6] && skip[7] && - skip[8] && skip[9] && skip[10] && skip[11] && - skip[12] && skip[13] && skip[14] && skip[15]))) || + !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; } else { int x, y; - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? - TX_32X32 : - cm->txfm_mode; + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) { diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index be9c224b3..75c8ea8f3 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -58,7 +58,8 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32); + vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], + b->dqcoeff, b->diff, 32); } vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -174,13 +175,16 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1); - vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); - vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], + b->dqcoeff, b->diff, 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1], + (b + 1)->dqcoeff, (b + 1)->diff, 32); i++; } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, ib + iblock[i]); - vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32); + vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], + b->dqcoeff, b->diff, 32); } } } @@ -210,7 +214,8 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { x->fwd_txm4x4(be->src_diff, be->coeff, 16); x->quantize_b_4x4(x, ib); - vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16); + vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], + b->dqcoeff, b->diff, 16); vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 62f1a2a30..b2ee800cd 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -146,6 +146,50 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, } } +void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, + const uint8_t *pred, int dst_stride) { + int r, c; + + for (r = 0; r < 64; r++) { + for (c = 0; c < 64; c++) { + diff[c] = src[c] - pred[c]; + } + + diff += 64; + pred += dst_stride; + src += src_stride; + } +} + +void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride) { + int16_t *udiff = diff + 4096; + int16_t *vdiff = diff + 4096 + 1024; + int r, c; + + for (r = 0; r < 32; r++) { + for (c = 0; c < 32; c++) { + udiff[c] = usrc[c] - upred[c]; + } + + udiff += 32; + upred += dst_stride; + usrc += src_stride; + } + + for (r = 0; r < 32; r++) { + for (c = 0; c < 32; c++) { + vdiff[c] = vsrc[c] - vpred[c]; + } + + vdiff += 32; + vpred += dst_stride; + vsrc += src_stride; + } +} + void vp9_subtract_mby_c(int16_t *diff, uint8_t *src, uint8_t *pred, int stride) { vp9_subtract_mby_s_c(diff, src, stride, pred, 16); @@ -245,15 +289,168 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) { } void vp9_transform_sby_32x32(MACROBLOCK *x) { - SUPERBLOCK * const x_sb = &x->sb_coeff_data; - vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64); + vp9_short_fdct32x32(x->src_diff, x->coeff, 64); +} + +void vp9_transform_sby_16x16(MACROBLOCK *x) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, + x->coeff + n * 256, 64); + } +} + +void vp9_transform_sby_8x8(MACROBLOCK *x) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, + x->coeff + n * 64, 64); + } +} + +void vp9_transform_sby_4x4(MACROBLOCK *x) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, + x->coeff + n * 16, 64); + } } void vp9_transform_sbuv_16x16(MACROBLOCK *x) { - SUPERBLOCK * const x_sb = &x->sb_coeff_data; vp9_clear_system_state(); - x->fwd_txm16x16(x_sb->src_diff + 1024, x_sb->coeff + 1024, 32); - x->fwd_txm16x16(x_sb->src_diff + 1280, x_sb->coeff + 1280, 32); + x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32); + x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32); +} + +void vp9_transform_sbuv_8x8(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8, + x->coeff + 1024 + n * 64, 32); + x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8, + x->coeff + 1280 + n * 64, 32); + } +} + +void vp9_transform_sbuv_4x4(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4, + x->coeff + 1024 + n * 16, 32); + x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4, + x->coeff + 1280 + n * 16, 32); + } +} + +void vp9_transform_sb64y_32x32(MACROBLOCK *x) { + int n; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32, + x->coeff + n * 1024, 128); + } +} + +void vp9_transform_sb64y_16x16(MACROBLOCK *x) { + int n; + + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, + x->coeff + n * 256, 128); + } +} + +void vp9_transform_sb64y_8x8(MACROBLOCK *x) { + int n; + + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, + x->coeff + n * 64, 128); + } +} + +void vp9_transform_sb64y_4x4(MACROBLOCK *x) { + int n; + + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + + x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4, + x->coeff + n * 16, 128); + } +} + +void vp9_transform_sb64uv_32x32(MACROBLOCK *x) { + vp9_clear_system_state(); + vp9_short_fdct32x32(x->src_diff + 4096, + x->coeff + 4096, 64); + vp9_short_fdct32x32(x->src_diff + 4096 + 1024, + x->coeff + 4096 + 1024, 64); +} + +void vp9_transform_sb64uv_16x16(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16, + x->coeff + 4096 + n * 256, 64); + x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16, + x->coeff + 4096 + 1024 + n * 256, 64); + } +} + +void vp9_transform_sb64uv_8x8(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8, + x->coeff + 4096 + n * 64, 64); + x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8, + x->coeff + 4096 + 1024 + n * 64, 64); + } +} + +void vp9_transform_sb64uv_4x4(MACROBLOCK *x) { + int n; + + vp9_clear_system_state(); + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4, + x->coeff + 4096 + n * 16, 64); + x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4, + x->coeff + 4096 + 1024 + n * 16, 64); + } } #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) @@ -294,55 +491,35 @@ static int trellis_get_coeff_context(int token) { return vp9_get_coef_context(&recent_energy, token); } -static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, +static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, + const int16_t *dequant_ptr, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int tx_size) { const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; - BLOCK *b = &mb->block[i]; - BLOCKD *d = &xd->block[i]; - vp9_token_state tokens[257][2]; - unsigned best_index[257][2]; - const int16_t *dequant_ptr = d->dequant, *coeff_ptr = b->coeff; - int16_t *qcoeff_ptr = d->qcoeff; - int16_t *dqcoeff_ptr = d->dqcoeff; - int eob = xd->eobs[i], final_eob, sz = 0; + vp9_token_state tokens[1025][2]; + unsigned best_index[1025][2]; + const int16_t *coeff_ptr = mb->coeff + ib * 16; + int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; + int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16; + int eob = xd->eobs[ib], final_eob, sz = 0; const int i0 = 0; - int rc, x, next; + int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; int rate0, rate1, error0, error1, t0, t1; int best, band, pt; int err_mult = plane_rd_mult[type]; int default_eob; int const *scan; + const int mul = 1 + (tx_size == TX_32X32); switch (tx_size) { default: case TX_4X4: - scan = vp9_default_zig_zag1d_4x4; default_eob = 16; - // TODO: this isn't called (for intra4x4 modes), but will be left in - // since it could be used later - { - TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d); - if (tx_type != DCT_DCT) { - switch (tx_type) { - case ADST_DCT: - scan = vp9_row_scan_4x4; - break; - - case DCT_ADST: - scan = vp9_col_scan_4x4; - break; - - default: - scan = vp9_default_zig_zag1d_4x4; - break; - } - } else { - scan = vp9_default_zig_zag1d_4x4; - } - } + // FIXME(rbultje): although optimize_b currently isn't called for + // intra4x4, this should be changed to be adst-compatible + scan = vp9_default_zig_zag1d_4x4; break; case TX_8X8: scan = vp9_default_zig_zag1d_8x8; @@ -352,6 +529,10 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, scan = vp9_default_zig_zag1d_16x16; default_eob = 256; break; + case TX_32X32: + scan = vp9_default_zig_zag1d_32x32; + default_eob = 1024; + break; } /* Now set up a Viterbi trellis to evaluate alternative roundings. */ @@ -395,7 +576,7 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, /* And pick the best. */ best = rd_cost1 < rd_cost0; base_bits = *(vp9_dct_value_cost_ptr + x); - dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; + dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]); d2 = dx * dx; tokens[i][0].rate = base_bits + (best ? rate1 : rate0); tokens[i][0].error = d2 + (best ? error1 : error0); @@ -407,8 +588,9 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) && - (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0])) + if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) && + (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul + + dequant_ptr[rc != 0])) shortcut = 1; else shortcut = 0; @@ -504,14 +686,14 @@ static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type, final_eob = i; rc = scan[i]; qcoeff_ptr[rc] = x; - dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]); + dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; next = tokens[i][best].next; best = best_index[i][best]; } final_eob++; - xd->eobs[d - xd->block] = final_eob; + xd->eobs[ib] = final_eob; *a = *l = (final_eob > 0); } @@ -531,7 +713,7 @@ void vp9_optimize_mby_4x4(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; for (b = 0; b < 16; b++) { - optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, + optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); } @@ -553,7 +735,7 @@ void vp9_optimize_mbuv_4x4(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; for (b = 16; b < 24; b++) { - optimize_b(x, b, PLANE_TYPE_UV, + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); } @@ -583,7 +765,8 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, &above_ec, &left_ec, TX_8X8); + optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, + &above_ec, &left_ec, TX_8X8); a[1] = a[0] = above_ec; l[1] = l[0] = left_ec; } @@ -602,7 +785,8 @@ void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8); + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, + &above_ec, &left_ec, TX_8X8); } } @@ -621,7 +805,8 @@ void vp9_optimize_mby_16x16(MACROBLOCK *x) { ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0; tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0; - optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16); + optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + &ta, &tl, TX_16X16); } static void optimize_mb_16x16(MACROBLOCK *x) { @@ -629,6 +814,333 @@ static void optimize_mb_16x16(MACROBLOCK *x) { vp9_optimize_mbuv_8x8(x); } +void vp9_optimize_sby_32x32(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT ta, tl; + + ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + &ta, &tl, TX_32X32); +} + +void vp9_optimize_sby_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT ta[2], tl[2]; + int n; + + ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; + ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; + tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; + tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_16X16); + } +} + +void vp9_optimize_sby_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT ta[4], tl[4]; + int n; + + ta[0] = (a[0] + a[1]) != 0; + ta[1] = (a[2] + a[3]) != 0; + ta[2] = (a1[0] + a1[1]) != 0; + ta[3] = (a1[2] + a1[3]) != 0; + tl[0] = (l[0] + l[1]) != 0; + tl[1] = (l[2] + l[3]) != 0; + tl[2] = (l1[0] + l1[1]) != 0; + tl[3] = (l1[2] + l1[3]) != 0; + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_8X8); + } +} + +void vp9_optimize_sby_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT ta[8], tl[8]; + int n; + + vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_4X4); + } +} + +void vp9_optimize_sbuv_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; + int b; + + for (b = 64; b < 96; b += 16) { + const int cidx = b >= 80 ? 20 : 16; + a = ta + vp9_block2above_sb[TX_16X16][b]; + l = tl + vp9_block2left_sb[TX_16X16][b]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_16X16); + } +} + +void vp9_optimize_sbuv_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l, above_ec, left_ec; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 64; b < 96; b += 4) { + const int cidx = b >= 80 ? 20 : 16; + a = ta + vp9_block2above_sb[TX_8X8][b]; + l = tl + vp9_block2left_sb[TX_8X8][b]; + above_ec = (a[0] + a[1]) != 0; + left_ec = (l[0] + l[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_8X8); + a[0] = a[1] = above_ec; + l[0] = l[1] = left_ec; + } +} + +void vp9_optimize_sbuv_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 64; b < 96; b++) { + const int cidx = b >= 80 ? 20 : 16; + a = ta + vp9_block2above_sb[TX_4X4][b]; + l = tl + vp9_block2left_sb[TX_4X4][b]; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + a, l, TX_4X4); + } +} + +void vp9_optimize_sb64y_32x32(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); + ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); + ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); + ENTROPY_CONTEXT ta[2], tl[2]; + int n; + + ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0; + tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0; + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + optimize_b(x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_32X32); + } +} + +void vp9_optimize_sb64y_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); + ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); + ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); + ENTROPY_CONTEXT ta[4], tl[4]; + int n; + + ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; + ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; + ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0; + ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0; + tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; + tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; + tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0; + tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0; + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_16X16); + } +} + +void vp9_optimize_sb64y_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); + ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); + ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); + ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); + ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); + ENTROPY_CONTEXT ta[8], tl[8]; + int n; + + ta[0] = (a[0] + a[1]) != 0; + ta[1] = (a[2] + a[3]) != 0; + ta[2] = (a1[0] + a1[1]) != 0; + ta[3] = (a1[2] + a1[3]) != 0; + ta[4] = (a2[0] + a2[1]) != 0; + ta[5] = (a2[2] + a2[3]) != 0; + ta[6] = (a3[0] + a3[1]) != 0; + ta[7] = (a3[2] + a3[3]) != 0; + tl[0] = (l[0] + l[1]) != 0; + tl[1] = (l[2] + l[3]) != 0; + tl[2] = (l1[0] + l1[1]) != 0; + tl[3] = (l1[2] + l1[3]) != 0; + tl[4] = (l2[0] + l2[1]) != 0; + tl[5] = (l2[2] + l2[3]) != 0; + tl[6] = (l3[0] + l3[1]) != 0; + tl[7] = (l3[2] + l3[3]) != 0; + for (n = 0; n < 64; n++) { + const int x_idx = n & 7, y_idx = n >> 3; + + optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_8X8); + } +} + +void vp9_optimize_sb64y_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT ta[16], tl[16]; + int n; + + vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); + vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); + for (n = 0; n < 256; n++) { + const int x_idx = n & 15, y_idx = n >> 4; + + optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + ta + x_idx, tl + y_idx, TX_4X4); + } +} + +void vp9_optimize_sb64uv_32x32(MACROBLOCK *x) { + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; + ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + int b; + + for (b = 256; b < 384; b += 64) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_32X32][b]; + l = tl + vp9_block2left_sb64[TX_32X32][b]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l + 2 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &a_ec, &l_ec, TX_32X32); + } +} + +void vp9_optimize_sb64uv_16x16(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 256; b < 384; b += 16) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_16X16][b]; + l = tl + vp9_block2left_sb64[TX_16X16][b]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; + left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_16X16); + a[0] = a[1] = a1[0] = a1[1] = above_ec; + l[0] = l[1] = l1[0] = l1[1] = left_ec; + } +} + +void vp9_optimize_sb64uv_8x8(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l, above_ec, left_ec; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 256; b < 384; b += 4) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_8X8][b]; + l = tl + vp9_block2left_sb64[TX_8X8][b]; + above_ec = (a[0] + a[1]) != 0; + left_ec = (l[0] + l[1]) != 0; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + &above_ec, &left_ec, TX_8X8); + a[0] = a[1] = above_ec; + l[0] = l[1] = left_ec; + } +} + +void vp9_optimize_sb64uv_4x4(MACROBLOCK *x) { + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; + ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; + ENTROPY_CONTEXT *a, *l; + int b; + + vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); + vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); + for (b = 256; b < 384; b++) { + const int cidx = b >= 320 ? 20 : 16; + a = ta + vp9_block2above_sb64[TX_4X4][b]; + l = tl + vp9_block2left_sb64[TX_4X4][b]; + optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + a, l, TX_4X4); + } +} + void vp9_fidct_mb(MACROBLOCK *x) { MACROBLOCKD *const xd = &x->e_mbd; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 6356df215..917cf8b2a 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -35,7 +35,6 @@ void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_mb_8x8(MACROBLOCK *mb); void vp9_transform_mby_8x8(MACROBLOCK *x); void vp9_transform_mbuv_8x8(MACROBLOCK *x); -void vp9_build_dcblock_8x8(MACROBLOCK *b); void vp9_optimize_mby_8x8(MACROBLOCK *x); void vp9_optimize_mbuv_8x8(MACROBLOCK *x); @@ -44,7 +43,36 @@ void vp9_transform_mby_16x16(MACROBLOCK *x); void vp9_optimize_mby_16x16(MACROBLOCK *x); void vp9_transform_sby_32x32(MACROBLOCK *x); +void vp9_optimize_sby_32x32(MACROBLOCK *x); +void vp9_transform_sby_16x16(MACROBLOCK *x); +void vp9_optimize_sby_16x16(MACROBLOCK *x); +void vp9_transform_sby_8x8(MACROBLOCK *x); +void vp9_optimize_sby_8x8(MACROBLOCK *x); +void vp9_transform_sby_4x4(MACROBLOCK *x); +void vp9_optimize_sby_4x4(MACROBLOCK *x); void vp9_transform_sbuv_16x16(MACROBLOCK *x); +void vp9_optimize_sbuv_16x16(MACROBLOCK *x); +void vp9_transform_sbuv_8x8(MACROBLOCK *x); +void vp9_optimize_sbuv_8x8(MACROBLOCK *x); +void vp9_transform_sbuv_4x4(MACROBLOCK *x); +void vp9_optimize_sbuv_4x4(MACROBLOCK *x); + +void vp9_transform_sb64y_32x32(MACROBLOCK *x); +void vp9_optimize_sb64y_32x32(MACROBLOCK *x); +void vp9_transform_sb64y_16x16(MACROBLOCK *x); +void vp9_optimize_sb64y_16x16(MACROBLOCK *x); +void vp9_transform_sb64y_8x8(MACROBLOCK *x); +void vp9_optimize_sb64y_8x8(MACROBLOCK *x); +void vp9_transform_sb64y_4x4(MACROBLOCK *x); +void vp9_optimize_sb64y_4x4(MACROBLOCK *x); +void vp9_transform_sb64uv_32x32(MACROBLOCK *x); +void vp9_optimize_sb64uv_32x32(MACROBLOCK *x); +void vp9_transform_sb64uv_16x16(MACROBLOCK *x); +void vp9_optimize_sb64uv_16x16(MACROBLOCK *x); +void vp9_transform_sb64uv_8x8(MACROBLOCK *x); +void vp9_optimize_sb64uv_8x8(MACROBLOCK *x); +void vp9_transform_sb64uv_4x4(MACROBLOCK *x); +void vp9_optimize_sb64uv_4x4(MACROBLOCK *x); void vp9_fidct_mb(MACROBLOCK *x); @@ -63,5 +91,11 @@ void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *vsrc, int src_stride, const uint8_t *upred, const uint8_t *vpred, int dst_stride); +void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, + const uint8_t *pred, int dst_stride); +void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, + const uint8_t *vsrc, int src_stride, + const uint8_t *upred, + const uint8_t *vpred, int dst_stride); #endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 02a371964..50780d085 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -92,7 +92,7 @@ typedef struct { vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; - vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1]; vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ @@ -476,9 +476,9 @@ typedef struct VP9_COMP { vp9_coeff_probs frame_coef_probs_16x16[BLOCK_TYPES]; vp9_coeff_stats frame_branch_ct_16x16[BLOCK_TYPES]; - vp9_coeff_count coef_counts_32x32[BLOCK_TYPES_32X32]; - vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES_32X32]; - vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES_32X32]; + vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; + vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES]; + vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES]; int gfu_boost; int last_boost; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 399e8ecda..75f22fac0 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -344,39 +344,301 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) { } void vp9_quantize_sby_32x32(MACROBLOCK *x) { - MACROBLOCKD *xd = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &xd->block[0]; + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; quantize(b->zrun_zbin_boost, - x->sb_coeff_data.coeff, + x->coeff, 1024, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, - xd->sb_coeff_data.qcoeff, - xd->sb_coeff_data.dqcoeff, + xd->qcoeff, + xd->dqcoeff, d->dequant, b->zbin_extra, &xd->eobs[0], vp9_default_zig_zag1d_32x32, 2); } +void vp9_quantize_sby_16x16(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 4; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 256, + 256, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 256, + xd->dqcoeff + n * 256, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 16], + vp9_default_zig_zag1d_16x16, 1); +} + +void vp9_quantize_sby_8x8(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 16; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 64, + 64, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 64, + xd->dqcoeff + n * 64, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 4], + vp9_default_zig_zag1d_8x8, 1); +} + +void vp9_quantize_sby_4x4(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 64; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 16, + 16, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 16, + xd->dqcoeff + n * 16, + d->dequant, + b->zbin_extra, + &xd->eobs[n], + vp9_default_zig_zag1d_4x4, 1); +} + void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { int i; - MACROBLOCKD *xd = &x->e_mbd; + MACROBLOCKD *const xd = &x->e_mbd; - for (i = 16; i < 24; i += 4) - quantize(x->block[i].zrun_zbin_boost, - x->sb_coeff_data.coeff + 1024 + (i - 16) * 64, - 256, x->block[i].skip_block, - x->block[i].zbin, - x->block[i].round, x->block[0].quant, x->block[i].quant_shift, - xd->sb_coeff_data.qcoeff + 1024 + (i - 16) * 64, - xd->sb_coeff_data.dqcoeff + 1024 + (i - 16) * 64, - xd->block[i].dequant, - x->block[i].zbin_extra, + for (i = 64; i < 96; i += 16) { + int cidx = i < 80 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 256, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, &xd->eobs[i], vp9_default_zig_zag1d_16x16, 1); + } +} + +void vp9_quantize_sbuv_8x8(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 64; i < 96; i += 4) { + int cidx = i < 80 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 64, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_8x8, 1); + } +} + +void vp9_quantize_sbuv_4x4(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 64; i < 96; i++) { + int cidx = i < 80 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 16, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_4x4, 1); + } +} + +void vp9_quantize_sb64y_32x32(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 4; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 1024, + 1024, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 1024, + xd->dqcoeff + n * 1024, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 64], + vp9_default_zig_zag1d_32x32, 2); +} + +void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 16; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 256, + 256, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 256, + xd->dqcoeff + n * 256, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 16], + vp9_default_zig_zag1d_16x16, 1); +} + +void vp9_quantize_sb64y_8x8(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 64; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 64, + 64, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 64, + xd->dqcoeff + n * 64, + d->dequant, + b->zbin_extra, + &xd->eobs[n * 4], + vp9_default_zig_zag1d_8x8, 1); +} + +void vp9_quantize_sb64y_4x4(MACROBLOCK *x) { + MACROBLOCKD *const xd = &x->e_mbd; + BLOCK *const b = &x->block[0]; + BLOCKD *const d = &xd->block[0]; + int n; + + for (n = 0; n < 256; n++) + quantize(b->zrun_zbin_boost, + x->coeff + n * 16, + 16, b->skip_block, + b->zbin, + b->round, b->quant, b->quant_shift, + xd->qcoeff + n * 16, + xd->dqcoeff + n * 16, + d->dequant, + b->zbin_extra, + &xd->eobs[n], + vp9_default_zig_zag1d_4x4, 1); +} + +void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i += 64) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 1024, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_32x32, 2); + } +} + +void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i += 16) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 256, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_16x16, 1); + } +} + +void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i += 4) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 64, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_8x8, 1); + } +} + +void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) { + int i; + MACROBLOCKD *const xd = &x->e_mbd; + + for (i = 256; i < 384; i++) { + int cidx = i < 320 ? 16 : 20; + quantize(x->block[cidx].zrun_zbin_boost, + x->coeff + i * 16, + 16, x->block[cidx].skip_block, + x->block[cidx].zbin, x->block[cidx].round, + x->block[cidx].quant, x->block[cidx].quant_shift, + xd->qcoeff + i * 16, + xd->dqcoeff + i * 16, + xd->block[cidx].dequant, + x->block[cidx].zbin_extra, + &xd->eobs[i], + vp9_default_zig_zag1d_4x4, 1); + } } /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index d338e620a..32eb05a11 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -74,7 +74,21 @@ extern prototype_quantize_block(vp9_quantize_quantb_16x16); extern prototype_quantize_mb(vp9_quantize_mby_16x16); void vp9_quantize_sby_32x32(MACROBLOCK *x); +void vp9_quantize_sby_16x16(MACROBLOCK *x); +void vp9_quantize_sby_8x8(MACROBLOCK *x); +void vp9_quantize_sby_4x4(MACROBLOCK *x); void vp9_quantize_sbuv_16x16(MACROBLOCK *x); +void vp9_quantize_sbuv_8x8(MACROBLOCK *x); +void vp9_quantize_sbuv_4x4(MACROBLOCK *x); + +void vp9_quantize_sb64y_32x32(MACROBLOCK *x); +void vp9_quantize_sb64y_16x16(MACROBLOCK *x); +void vp9_quantize_sb64y_8x8(MACROBLOCK *x); +void vp9_quantize_sb64y_4x4(MACROBLOCK *x); +void vp9_quantize_sb64uv_32x32(MACROBLOCK *x); +void vp9_quantize_sb64uv_16x16(MACROBLOCK *x); +void vp9_quantize_sb64uv_8x8(MACROBLOCK *x); +void vp9_quantize_sb64uv_4x4(MACROBLOCK *x); struct VP9_COMP; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 59e33a464..c5b3e3a16 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -275,7 +275,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { fill_token_costs(cpi->mb.token_costs[TX_16X16], cpi->common.fc.coef_probs_16x16, BLOCK_TYPES); fill_token_costs(cpi->mb.token_costs[TX_32X32], - cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32); + cpi->common.fc.coef_probs_32x32, BLOCK_TYPES); /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; @@ -382,25 +382,27 @@ int vp9_uvsse(MACROBLOCK *x) { } static INLINE int cost_coeffs(MACROBLOCK *mb, - BLOCKD *b, PLANE_TYPE type, + int ib, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, TX_SIZE tx_size) { - int pt; MACROBLOCKD *const xd = &mb->e_mbd; - const int ib = (int)(b - xd->block); + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; + int pt; const int eob = xd->eobs[ib]; int c = 0; int cost = 0, seg_eob; - const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int segment_id = mbmi->segment_id; const int *scan; - int16_t *qcoeff_ptr = b->qcoeff; - const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type(xd, b) : DCT_DCT; + const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; + const int ref = mbmi->ref_frame != INTRA_FRAME; + const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 && + type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type(xd, &xd->block[ib]) : DCT_DCT; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; + ENTROPY_CONTEXT a_ec, l_ec; ENTROPY_CONTEXT *const a1 = a + sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); ENTROPY_CONTEXT *const l1 = l + @@ -408,6 +410,8 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, switch (tx_size) { case TX_4X4: + a_ec = *a; + l_ec = *l; scan = vp9_default_zig_zag1d_4x4; seg_eob = 16; if (type == PLANE_TYPE_Y_WITH_DC) { @@ -428,8 +432,6 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, scan = vp9_default_zig_zag1d_16x16; seg_eob = 256; if (type == PLANE_TYPE_UV) { - const int uv_idx = ib - 16; - qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx; a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; } else { @@ -440,11 +442,22 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, case TX_32X32: scan = vp9_default_zig_zag1d_32x32; seg_eob = 1024; - qcoeff_ptr = xd->sb_coeff_data.qcoeff; - a_ec = (a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]) != 0; + if (type == PLANE_TYPE_UV) { + ENTROPY_CONTEXT *a2, *a3, *l2, *l3; + a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a_ec = (a[0] + a[1] + a1[0] + a1[1] + + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + } else { + a_ec = (a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + } break; default: abort(); @@ -510,7 +523,7 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC, + cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); @@ -553,7 +566,7 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b += 4) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC, + cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); @@ -593,7 +606,7 @@ static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { tl = (ENTROPY_CONTEXT *)xd->left_context; } - cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); + cost = cost_coeffs(mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); return cost; } @@ -743,7 +756,7 @@ static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { tl = (ENTROPY_CONTEXT *) xd->left_context; } - return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); + return cost_coeffs(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); } static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, @@ -763,9 +776,7 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, static void super_block_yrd_32x32(MACROBLOCK *x, int *rate, int *distortion, int *skippable, int backup) { - SUPERBLOCK * const x_sb = &x->sb_coeff_data; - MACROBLOCKD * const xd = &x->e_mbd; - SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; + MACROBLOCKD *const xd = &x->e_mbd; #if DEBUG_ERROR int16_t out[1024]; #endif @@ -773,17 +784,17 @@ static void super_block_yrd_32x32(MACROBLOCK *x, vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); #if DEBUG_ERROR - vp9_short_idct32x32(xd_sb->dqcoeff, out, 64); + vp9_short_idct32x32(xd->dqcoeff, out, 64); #endif - *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024); + *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024); #if DEBUG_ERROR printf("IDCT/FDCT error 32x32: %d (d: %d)\n", - vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion); + vp9_block_error_c(x->src_diff, out, 1024), *distortion); #endif *rate = rdcost_sby_32x32(x, backup); - *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd); + *skippable = vp9_sby_is_skippable_32x32(xd); } static void super_block_yrd(VP9_COMP *cpi, @@ -807,7 +818,7 @@ static void super_block_yrd(VP9_COMP *cpi, s[n] = 1; } - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1); @@ -896,7 +907,7 @@ static void super_block_64_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_32X32][x_idx << 1]; xd->left_context = &t_left[TX_32X32][y_idx << 1]; - vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, + vp9_subtract_sby_s_c(x->src_diff, src + 32 * x_idx + 32 * y_idx * src_y_stride, src_y_stride, dst + 32 * x_idx + 32 * y_idx * dst_y_stride, @@ -1051,7 +1062,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, tempa = ta; templ = tl; - ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); + ratey = cost_coeffs(x, b - xd->block, + PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); rate += ratey; distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; @@ -1355,7 +1367,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ta1 = ta0 + 1; tl1 = tl0 + 1; - rate_t = cost_coeffs(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, + rate_t = cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, ta0, tl0, TX_8X8); rate += rate_t; @@ -1388,12 +1400,12 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, x->quantize_b_4x4(x, ib + iblock[i]); } distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); - rate_t += cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); if (do_two) { i++; - rate_t += cost_coeffs(x, b + 1, PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); } @@ -1500,7 +1512,7 @@ static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) { } for (b = 16; b < 24; b++) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, + cost += cost_coeffs(mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); @@ -1541,7 +1553,7 @@ static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { } for (b = 16; b < 24; b += 4) - cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV, + cost += cost_coeffs(mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); @@ -1580,7 +1592,7 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { } for (b = 16; b < 24; b += 4) - cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV, + cost += cost_coeffs(x, b * 4, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_16X16); @@ -1596,8 +1608,8 @@ static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate, vp9_quantize_sbuv_16x16(x); *rate = rd_cost_sbuv_16x16(x, backup); - *distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024, - xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2; + *distortion = vp9_block_error_c(x->coeff + 1024, + xd->dqcoeff + 1024, 512) >> 2; *skip = vp9_sbuv_is_skippable_16x16(xd); } @@ -1609,8 +1621,8 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - if (mbmi->txfm_size == TX_32X32) { - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + if (mbmi->txfm_size >= TX_16X16) { + vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1); @@ -1789,8 +1801,8 @@ static void super_block_uvrd(MACROBLOCK *x, const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - if (mbmi->txfm_size == TX_32X32) { - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + if (mbmi->txfm_size >= TX_16X16) { + vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1); @@ -1842,6 +1854,46 @@ static void super_block_uvrd(MACROBLOCK *x, } } +static int rd_cost_sb64uv_32x32(MACROBLOCK *x, int backup) { + int b; + int cost = 0; + MACROBLOCKD *const xd = &x->e_mbd; + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT *ta, *tl; + + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + + ta = (ENTROPY_CONTEXT *) &t_above; + tl = (ENTROPY_CONTEXT *) &t_left; + } else { + ta = (ENTROPY_CONTEXT *)xd->above_context; + tl = (ENTROPY_CONTEXT *)xd->left_context; + } + + for (b = 16; b < 24; b += 4) + cost += cost_coeffs(x, b * 16, PLANE_TYPE_UV, + ta + vp9_block2above[TX_8X8][b], + tl + vp9_block2left[TX_8X8][b], TX_32X32); + + return cost; +} + +static void rd_inter64x64_uv_32x32(MACROBLOCK *x, int *rate, + int *distortion, int *skip, + int backup) { + MACROBLOCKD *const xd = &x->e_mbd; + + vp9_transform_sb64uv_32x32(x); + vp9_quantize_sb64uv_32x32(x); + + *rate = rd_cost_sb64uv_32x32(x, backup); + *distortion = vp9_block_error_c(x->coeff + 4096, + xd->dqcoeff + 4096, 2048); + *skip = vp9_sb64uv_is_skippable_32x32(xd); +} + static void super_block_64_uvrd(MACROBLOCK *x, int *rate, int *distortion, @@ -1856,10 +1908,15 @@ static void super_block_64_uvrd(MACROBLOCK *x, ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; int d = 0, r = 0, n, s = 1; + // FIXME not needed if tx=32x32 memcpy(t_above, xd->above_context, sizeof(t_above)); memcpy(t_left, xd->left_context, sizeof(t_left)); if (mbmi->txfm_size == TX_32X32) { + vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride); + rd_inter64x64_uv_32x32(x, &r, &d, &s, 1); + } else if (mbmi->txfm_size == TX_16X16) { int n; *rate = 0; @@ -1867,7 +1924,7 @@ static void super_block_64_uvrd(MACROBLOCK *x, int x_idx = n & 1, y_idx = n >> 1; int r_tmp, d_tmp, s_tmp; - vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + vp9_subtract_sbuv_s_c(x->src_diff, usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, src_uv_stride, @@ -2170,7 +2227,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, x->quantize_b_4x4(x, i); thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, i, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][i], tl + vp9_block2left[TX_4X4][i], TX_4X4); } @@ -2233,10 +2290,10 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_8x8(x, idx); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); otherdist += thisdistortion; - othercost += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[TX_8X8][idx], - tlcp + vp9_block2left[TX_8X8][idx], - TX_8X8); + othercost += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, + tacp + vp9_block2above[TX_8X8][idx], + tlcp + vp9_block2left[TX_8X8][idx], + TX_8X8); } for (j = 0; j < 4; j += 2) { bd = &xd->block[ib + iblock[j]]; @@ -2245,11 +2302,12 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][ib + iblock[j]], tl + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); - *labelyrate += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, ib + iblock[j] + 1, + PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1], tl + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); @@ -2263,11 +2321,12 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); otherdist += thisdistortion; - othercost += cost_coeffs(x, bd, PLANE_TYPE_Y_WITH_DC, + othercost += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_4X4][ib + iblock[j]], tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); - othercost += cost_coeffs(x, bd + 1, PLANE_TYPE_Y_WITH_DC, + othercost += cost_coeffs(x, ib + iblock[j] + 1, + PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1], tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], TX_4X4); @@ -2277,7 +2336,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_8x8(x, idx); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, bd2, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][idx], tl + vp9_block2left[TX_8X8][idx], TX_8X8); } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 95a2e1227..d115fe80e 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -28,12 +28,12 @@ vp9_coeff_accum context_counters_4x4[BLOCK_TYPES]; vp9_coeff_accum context_counters_8x8[BLOCK_TYPES]; vp9_coeff_accum context_counters_16x16[BLOCK_TYPES]; -vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; +vp9_coeff_accum context_counters_32x32[BLOCK_TYPES]; extern vp9_coeff_stats tree_update_hist_4x4[BLOCK_TYPES]; extern vp9_coeff_stats tree_update_hist_8x8[BLOCK_TYPES]; extern vp9_coeff_stats tree_update_hist_16x16[BLOCK_TYPES]; -extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES_32X32]; +extern vp9_coeff_stats tree_update_hist_32x32[BLOCK_TYPES]; #endif /* ENTROPY_STATS */ static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; @@ -101,37 +101,52 @@ static void tokenize_b(VP9_COMP *cpi, PLANE_TYPE type, TX_SIZE tx_size, int dry_run) { + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ int c = 0; int recent_energy = 0; - const BLOCKD * const b = xd->block + ib; const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ - int16_t *qcoeff_ptr = b->qcoeff; + int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib; int seg_eob; - const int segment_id = xd->mode_info_context->mbmi.segment_id; + const int segment_id = mbmi->segment_id; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; const int *scan; vp9_coeff_count *counts; vp9_coeff_probs *probs; - const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type(xd, b) : DCT_DCT; - const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; - - ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + - vp9_block2left[tx_size][ib]; - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; - - ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + - vp9_block2left[tx_size][ib]; + const TX_TYPE tx_type = (sb_type == BLOCK_SIZE_MB16X16 && + type == PLANE_TYPE_Y_WITH_DC) ? + get_tx_type(xd, &xd->block[ib]) : DCT_DCT; + const int ref = mbmi->ref_frame != INTRA_FRAME; + ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + if (sb_type == BLOCK_SIZE_SB64X64) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb64[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + } else if (sb_type == BLOCK_SIZE_SB32X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a3 = l2 = l3 = NULL; + } else { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; + a1 = l1 = a2 = l2 = a3 = l3 = NULL; + } switch (tx_size) { default: case TX_4X4: + a_ec = *a; + l_ec = *l; seg_eob = 16; scan = vp9_default_zig_zag1d_4x4; if (tx_type != DCT_DCT) { @@ -164,23 +179,23 @@ static void tokenize_b(VP9_COMP *cpi, scan = vp9_default_zig_zag1d_16x16; counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; - if (type == PLANE_TYPE_UV) { - int uv_idx = (ib - 16) >> 2; - qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 256 * uv_idx; - } break; case TX_32X32: - a_ec = a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]; - l_ec = l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]; - a_ec = a_ec != 0; - l_ec = l_ec != 0; + if (type != PLANE_TYPE_UV) { + a_ec = (a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + } else { + a_ec = (a[0] + a[1] + a1[0] + a1[1] + + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + } seg_eob = 1024; scan = vp9_default_zig_zag1d_32x32; counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; - qcoeff_ptr = xd->sb_coeff_data.qcoeff; break; } @@ -233,10 +248,17 @@ static void tokenize_b(VP9_COMP *cpi, l1[0] = l1[1] = l[1] = l_ec; } } else if (tx_size == TX_32X32) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; - a1[0] = a1[1] = a1[2] = a1[3] = a_ec; - l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + if (type != PLANE_TYPE_UV) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; + a1[0] = a1[1] = a1[2] = a1[3] = a_ec; + l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + } else { + a[1] = a1[0] = a1[1] = a_ec; + l[1] = l1[0] = l1[1] = l_ec; + a2[0] = a2[1] = a3[0] = a3[1] = a_ec; + l2[0] = l2[1] = l3[0] = l3[1] = l_ec; + } } } @@ -289,9 +311,7 @@ static int mb_is_skippable_8x8_4x4uv(MACROBLOCKD *xd) { } int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - skip &= !xd->eobs[0]; - return skip; + return (!xd->eobs[0]); } static int mb_is_skippable_16x16(MACROBLOCKD *xd) { @@ -299,13 +319,11 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) { } int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { - int skip = 1; - skip &= !xd->eobs[0]; - return skip; + return (!xd->eobs[0]); } int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->eobs[16]) & (!xd->eobs[20]); + return (!xd->eobs[64]) & (!xd->eobs[80]); } static int sb_is_skippable_32x32(MACROBLOCKD *xd) { @@ -313,6 +331,68 @@ static int sb_is_skippable_32x32(MACROBLOCKD *xd) { vp9_sbuv_is_skippable_16x16(xd); } +static int sby_is_skippable_16x16(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 64; i += 16) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb_is_skippable_16x16(MACROBLOCKD *xd) { + return sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd); +} + +static int sby_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 64; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sbuv_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 64; i < 96; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb_is_skippable_8x8(MACROBLOCKD *xd) { + return sby_is_skippable_8x8(xd) & sbuv_is_skippable_8x8(xd); +} + +static int sby_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 64; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sbuv_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 64; i < 96; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb_is_skippable_4x4(MACROBLOCKD *xd) { + return sby_is_skippable_4x4(xd) & sbuv_is_skippable_4x4(xd); +} + void vp9_tokenize_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, @@ -325,7 +405,21 @@ void vp9_tokenize_sb(VP9_COMP *cpi, const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); int b; - mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd); + switch (mbmi->txfm_size) { + case TX_32X32: + mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd); + break; + case TX_16X16: + mbmi->mb_skip_coeff = sb_is_skippable_16x16(xd); + break; + case TX_8X8: + mbmi->mb_skip_coeff = sb_is_skippable_8x8(xd); + break; + case TX_4X4: + mbmi->mb_skip_coeff = sb_is_skippable_4x4(xd); + break; + default: assert(0); + } if (mbmi->mb_skip_coeff) { if (!dry_run) @@ -333,7 +427,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (!cm->mb_no_coeff_skip) { vp9_stuff_sb(cpi, xd, t, dry_run); } else { - vp9_fix_contexts_sb(xd); + vp9_reset_sb_tokens_context(xd); } if (dry_run) *t = t_backup; @@ -343,13 +437,215 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (!dry_run) cpi->skip_false_count[mb_skip_context] += skip_inc; - tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, dry_run); - - for (b = 16; b < 24; b += 4) { - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, dry_run); + switch (mbmi->txfm_size) { + case TX_32X32: + tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, + TX_32X32, dry_run); + for (b = 64; b < 96; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, dry_run); + break; + case TX_16X16: + for (b = 0; b < 64; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_16X16, dry_run); + for (b = 64; b < 96; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 64; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_8X8, dry_run); + for (b = 64; b < 96; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 64; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_4X4, dry_run); + for (b = 64; b < 96; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_4X4, dry_run); + break; + default: assert(0); } + + if (dry_run) + *t = t_backup; +} + +static int sb64y_is_skippable_32x32(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i += 64) + skip &= (!xd->eobs[i]); + + return skip; +} + +int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) { + return (!xd->eobs[256]) & (!xd->eobs[320]); +} + +static int sb64_is_skippable_32x32(MACROBLOCKD *xd) { + return sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd); +} + +static int sb64y_is_skippable_16x16(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i += 16) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64uv_is_skippable_16x16(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 256; i < 384; i += 16) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64_is_skippable_16x16(MACROBLOCKD *xd) { + return sb64y_is_skippable_16x16(xd) & sb64uv_is_skippable_16x16(xd); +} + +static int sb64y_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64uv_is_skippable_8x8(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 256; i < 384; i += 4) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64_is_skippable_8x8(MACROBLOCKD *xd) { + return sb64y_is_skippable_8x8(xd) & sb64uv_is_skippable_8x8(xd); +} + +static int sb64y_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 0; i < 256; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64uv_is_skippable_4x4(MACROBLOCKD *xd) { + int skip = 1; + int i = 0; + + for (i = 256; i < 384; i++) + skip &= (!xd->eobs[i]); + + return skip; +} + +static int sb64_is_skippable_4x4(MACROBLOCKD *xd) { + return sb64y_is_skippable_4x4(xd) & sb64uv_is_skippable_4x4(xd); +} + +void vp9_tokenize_sb64(VP9_COMP *cpi, + MACROBLOCKD *xd, + TOKENEXTRA **t, + int dry_run) { + VP9_COMMON * const cm = &cpi->common; + MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; + TOKENEXTRA *t_backup = *t; + const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); + const int segment_id = mbmi->segment_id; + const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + int b; + + switch (mbmi->txfm_size) { + case TX_32X32: + mbmi->mb_skip_coeff = sb64_is_skippable_32x32(xd); + break; + case TX_16X16: + mbmi->mb_skip_coeff = sb64_is_skippable_16x16(xd); + break; + case TX_8X8: + mbmi->mb_skip_coeff = sb64_is_skippable_8x8(xd); + break; + case TX_4X4: + mbmi->mb_skip_coeff = sb64_is_skippable_4x4(xd); + break; + default: assert(0); + } + + if (mbmi->mb_skip_coeff) { + if (!dry_run) + cpi->skip_true_count[mb_skip_context] += skip_inc; + if (!cm->mb_no_coeff_skip) { + vp9_stuff_sb64(cpi, xd, t, dry_run); + } else { + vp9_reset_sb64_tokens_context(xd); + } + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) + cpi->skip_false_count[mb_skip_context] += skip_inc; + + switch (mbmi->txfm_size) { + case TX_32X32: + for (b = 0; b < 256; b += 64) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_32X32, dry_run); + for (b = 256; b < 384; b += 64) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_32X32, dry_run); + break; + case TX_16X16: + for (b = 0; b < 256; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_16X16, dry_run); + for (b = 256; b < 384; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 256; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_8X8, dry_run); + for (b = 256; b < 384; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 256; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, + TX_4X4, dry_run); + for (b = 256; b < 384; b++) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_4X4, dry_run); + break; + default: assert(0); + } + if (dry_run) *t = t_backup; } @@ -567,23 +863,23 @@ void print_context_counters() { /* print counts */ print_counter(f, context_counters_4x4, BLOCK_TYPES, - "vp9_default_coef_counts_4x4[BLOCK_TYPES_4X4]"); + "vp9_default_coef_counts_4x4[BLOCK_TYPES]"); print_counter(f, context_counters_8x8, BLOCK_TYPES, - "vp9_default_coef_counts_8x8[BLOCK_TYPES_8X8]"); + "vp9_default_coef_counts_8x8[BLOCK_TYPES]"); print_counter(f, context_counters_16x16, BLOCK_TYPES, - "vp9_default_coef_counts_16x16[BLOCK_TYPES_16X16]"); - print_counter(f, context_counters_32x32, BLOCK_TYPES_32X32, - "vp9_default_coef_counts_32x32[BLOCK_TYPES_32X32]"); + "vp9_default_coef_counts_16x16[BLOCK_TYPES]"); + print_counter(f, context_counters_32x32, BLOCK_TYPES, + "vp9_default_coef_counts_32x32[BLOCK_TYPES]"); /* print coefficient probabilities */ print_probs(f, context_counters_4x4, BLOCK_TYPES, - "default_coef_probs_4x4[BLOCK_TYPES_4X4]"); + "default_coef_probs_4x4[BLOCK_TYPES]"); print_probs(f, context_counters_8x8, BLOCK_TYPES, - "default_coef_probs_8x8[BLOCK_TYPES_8X8]"); + "default_coef_probs_8x8[BLOCK_TYPES]"); print_probs(f, context_counters_16x16, BLOCK_TYPES, - "default_coef_probs_16x16[BLOCK_TYPES_16X16]"); - print_probs(f, context_counters_32x32, BLOCK_TYPES_32X32, - "default_coef_probs_32x32[BLOCK_TYPES_32X32]"); + "default_coef_probs_16x16[BLOCK_TYPES]"); + print_probs(f, context_counters_32x32, BLOCK_TYPES, + "default_coef_probs_32x32[BLOCK_TYPES]"); fclose(f); @@ -600,31 +896,49 @@ void vp9_tokenize_initialize() { fill_value_tokens(); } -static INLINE void stuff_b(VP9_COMP *cpi, - MACROBLOCKD *xd, - const int ib, - TOKENEXTRA **tp, - PLANE_TYPE type, - TX_SIZE tx_size, - int dry_run) { +static void stuff_b(VP9_COMP *cpi, + MACROBLOCKD *xd, + const int ib, + TOKENEXTRA **tp, + PLANE_TYPE type, + TX_SIZE tx_size, + int dry_run) { vp9_coeff_count *counts; vp9_coeff_probs *probs; int pt, band; TOKENEXTRA *t = *tp; - const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; - ENTROPY_CONTEXT *const a = (ENTROPY_CONTEXT *)xd->above_context + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l = (ENTROPY_CONTEXT *)xd->left_context + - vp9_block2left[tx_size][ib]; - ENTROPY_CONTEXT a_ec = *a, l_ec = *l; - ENTROPY_CONTEXT *const a1 = (ENTROPY_CONTEXT *)(&xd->above_context[1]) + - vp9_block2above[tx_size][ib]; - ENTROPY_CONTEXT *const l1 = (ENTROPY_CONTEXT *)(&xd->left_context[1]) + - vp9_block2left[tx_size][ib]; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const int ref = mbmi->ref_frame != INTRA_FRAME; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; + ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; + + if (sb_type == BLOCK_SIZE_SB32X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + + vp9_block2above_sb64[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb64[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + } else if (sb_type == BLOCK_SIZE_SB32X32) { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above_sb[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left_sb[tx_size][ib]; + a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); + a2 = l2 = a3 = l3 = NULL; + } else { + a = (ENTROPY_CONTEXT *)xd->above_context + vp9_block2above[tx_size][ib]; + l = (ENTROPY_CONTEXT *)xd->left_context + vp9_block2left[tx_size][ib]; + a1 = l1 = a2 = l2 = a3 = l3 = NULL; + } switch (tx_size) { default: case TX_4X4: + a_ec = a[0]; + l_ec = l[0]; counts = cpi->coef_counts_4x4; probs = cpi->common.fc.coef_probs_4x4; break; @@ -646,12 +960,17 @@ static INLINE void stuff_b(VP9_COMP *cpi, probs = cpi->common.fc.coef_probs_16x16; break; case TX_32X32: - a_ec = a[0] + a[1] + a[2] + a[3] + - a1[0] + a1[1] + a1[2] + a1[3]; - l_ec = l[0] + l[1] + l[2] + l[3] + - l1[0] + l1[1] + l1[2] + l1[3]; - a_ec = a_ec != 0; - l_ec = l_ec != 0; + if (type != PLANE_TYPE_UV) { + a_ec = (a[0] + a[1] + a[2] + a[3] + + a1[0] + a1[1] + a1[2] + a1[3]) != 0; + l_ec = (l[0] + l[1] + l[2] + l[3] + + l1[0] + l1[1] + l1[2] + l1[3]) != 0; + } else { + a_ec = (a[0] + a[1] + a1[0] + a1[1] + + a2[0] + a2[1] + a3[0] + a3[1]) != 0; + l_ec = (l[0] + l[1] + l1[0] + l1[1] + + l2[0] + l2[1] + l3[0] + l3[1]) != 0; + } counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; break; @@ -678,10 +997,17 @@ static INLINE void stuff_b(VP9_COMP *cpi, l1[0] = l1[1] = l[1] = l_ec; } } else if (tx_size == TX_32X32) { - a[1] = a[2] = a[3] = a_ec; - l[1] = l[2] = l[3] = l_ec; - a1[0] = a1[1] = a1[2] = a1[3] = a_ec; - l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + if (type != PLANE_TYPE_Y_WITH_DC) { + a[1] = a[2] = a[3] = a_ec; + l[1] = l[2] = l[3] = l_ec; + a1[0] = a1[1] = a1[2] = a1[3] = a_ec; + l1[0] = l1[1] = l1[2] = l1[3] = l_ec; + } else { + a[1] = a1[0] = a1[1] = a_ec; + l[1] = l1[0] = l1[1] = l_ec; + a2[0] = a2[1] = a3[0] = a3[1] = a_ec; + l2[0] = l2[1] = l3[0] = l3[1] = l_ec; + } } if (!dry_run) { @@ -751,27 +1077,76 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } } -static void stuff_sb_32x32(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { - int b; - - stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - for (b = 16; b < 24; b += 4) { - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - } -} - void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { TOKENEXTRA * const t_backup = *t; + int b; - stuff_sb_32x32(cpi, xd, t, dry_run); + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); + for (b = 64; b < 96; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + break; + case TX_16X16: + for (b = 0; b < 64; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); + for (b = 64; b < 96; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 64; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); + for (b = 64; b < 96; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 64; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); + for (b = 64; b < 96; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); + break; + default: assert(0); + } if (dry_run) { *t = t_backup; } } -void vp9_fix_contexts_sb(MACROBLOCKD *xd) { - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); +void vp9_stuff_sb64(VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run) { + TOKENEXTRA * const t_backup = *t; + int b; + + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_32X32: + for (b = 0; b < 256; b += 64) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); + for (b = 256; b < 384; b += 64) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_32X32, dry_run); + break; + case TX_16X16: + for (b = 0; b < 256; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); + for (b = 256; b < 384; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + break; + case TX_8X8: + for (b = 0; b < 256; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); + for (b = 256; b < 384; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); + break; + case TX_4X4: + for (b = 0; b < 256; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); + for (b = 256; b < 384; b++) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); + break; + default: assert(0); + } + + if (dry_run) { + *t = t_backup; + } } diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 6ac19ba71..4d6fe6343 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -38,6 +38,7 @@ int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd); +int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd); struct VP9_COMP; @@ -45,13 +46,15 @@ void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); +void vp9_tokenize_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); - -void vp9_fix_contexts_sb(MACROBLOCKD *xd); +void vp9_stuff_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, + TOKENEXTRA **t, int dry_run); #ifdef ENTROPY_STATS void init_context_counters(); @@ -60,7 +63,7 @@ void print_context_counters(); extern vp9_coeff_accum context_counters_4x4[BLOCK_TYPES]; extern vp9_coeff_accum context_counters_8x8[BLOCK_TYPES]; extern vp9_coeff_accum context_counters_16x16[BLOCK_TYPES]; -extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES_32X32]; +extern vp9_coeff_accum context_counters_32x32[BLOCK_TYPES]; #endif extern const int *vp9_dct_value_cost_ptr;