From eb6ef2417f9a386ba8d91e934d6e8691b8312a98 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Wed, 20 Feb 2013 10:16:24 -0800 Subject: [PATCH] Coding con-zero count rather than EOB for coeffs This patch revamps the entropy coding of coefficients to code first a non-zero count per coded block and correspondingly remove the EOB token from the token set. STATUS: Main encode/decode code achieving encode/decode sync - done. Forward and backward probability updates to the nzcs - done. Rd costing updates for nzcs - done. Note: The dynamic progrmaming apporach used in trellis quantization is not exactly compatible with nzcs. A suboptimal approach has been used instead where branch costs are updated to account for changes in the nzcs. TODO: Training the default probs/counts for nzcs Change-Id: I951bc1e22f47885077a7453a09b0493daa77883d --- configure | 1 + vp9/common/vp9_blockd.h | 27 + vp9/common/vp9_coefupdateprobs.h | 7 + vp9/common/vp9_default_coef_probs.h | 127 +++ vp9/common/vp9_entropy.c | 1346 ++++++++++++++++++++++++++- vp9/common/vp9_entropy.h | 76 ++ vp9/common/vp9_onyxc_int.h | 58 ++ vp9/decoder/vp9_decodemv.c | 311 ++++++- vp9/decoder/vp9_decodframe.c | 126 ++- vp9/decoder/vp9_detokenize.c | 32 +- vp9/encoder/vp9_bitstream.c | 475 +++++++++- vp9/encoder/vp9_block.h | 6 + vp9/encoder/vp9_encodeframe.c | 263 ++++-- vp9/encoder/vp9_encodeintra.c | 16 +- vp9/encoder/vp9_encodeintra.h | 4 +- vp9/encoder/vp9_encodemb.c | 206 ++-- vp9/encoder/vp9_encodemb.h | 46 +- vp9/encoder/vp9_onyx_if.c | 12 +- vp9/encoder/vp9_onyx_int.h | 29 + vp9/encoder/vp9_quantize.c | 115 ++- vp9/encoder/vp9_ratectrl.c | 12 + vp9/encoder/vp9_rdopt.c | 361 ++++--- vp9/encoder/vp9_segmentation.c | 6 +- vp9/encoder/vp9_tokenize.c | 51 +- vp9/encoder/vp9_treewriter.c | 1 + 25 files changed, 3330 insertions(+), 384 deletions(-) diff --git a/configure b/configure index f55f79863..5c8dc8e84 100755 --- a/configure +++ b/configure @@ -245,6 +245,7 @@ EXPERIMENT_LIST=" comp_interintra_pred enable_6tap abovesprefmv + code_nonzerocount " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b46dd0568..9f978ce5e 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -250,6 +250,9 @@ typedef struct { INTERPOLATIONFILTERTYPE interp_filter; BLOCK_SIZE_TYPE sb_type; +#if CONFIG_CODE_NONZEROCOUNT + uint16_t nzcs[256+64*2]; +#endif } MB_MODE_INFO; typedef struct { @@ -295,6 +298,9 @@ typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]); DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]); DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]); +#if CONFIG_CODE_NONZEROCOUNT + DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]); +#endif /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */ BLOCKD block[24]; @@ -592,4 +598,25 @@ static void update_blockd_bmi(MACROBLOCKD *xd) { } } +static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) { + TX_SIZE tx_size_uv; + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + tx_size_uv = xd->mode_info_context->mbmi.txfm_size; + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) + tx_size_uv = TX_16X16; + else + tx_size_uv = xd->mode_info_context->mbmi.txfm_size; + } else { + if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) + tx_size_uv = TX_8X8; + else if (xd->mode_info_context->mbmi.txfm_size == TX_8X8 && + (xd->mode_info_context->mbmi.mode == I8X8_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV)) + tx_size_uv = TX_4X4; + else + tx_size_uv = xd->mode_info_context->mbmi.txfm_size; + } + return tx_size_uv; +} #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_coefupdateprobs.h b/vp9/common/vp9_coefupdateprobs.h index ab8d8d940..ff018e698 100644 --- a/vp9/common/vp9_coefupdateprobs.h +++ b/vp9/common/vp9_coefupdateprobs.h @@ -17,4 +17,11 @@ #define COEF_UPDATE_PROB_8X8 252 #define COEF_UPDATE_PROB_16X16 252 +#if CONFIG_CODE_NONZEROCOUNT +#define NZC_UPDATE_PROB_4X4 252 +#define NZC_UPDATE_PROB_8X8 252 +#define NZC_UPDATE_PROB_16X16 252 +#define NZC_UPDATE_PROB_32X32 252 +#endif + #endif // VP9_COMMON_VP9_COEFUPDATEPROBS_H__ diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h index 204e65af6..8fb802031 100644 --- a/vp9/common/vp9_default_coef_probs.h +++ b/vp9/common/vp9_default_coef_probs.h @@ -695,3 +695,130 @@ static const vp9_coeff_probs default_coef_probs_32x32[BLOCK_TYPES] = { } } }; + +#if CONFIG_CODE_NONZEROCOUNT +// TODO(debargha): Replace with probabilities once the stats stabilize +static const unsigned int default_nzc4x4_counts[MAX_NZC_CONTEXTS] + [REF_TYPES] + [BLOCK_TYPES] + [NZC4X4_TOKENS] = { + { + { + { 967652, 29023, 15039, 6952, 1568, 116 }, + { 789116, 22938, 4522, 1935, 520, 47 } + }, { + { 967652, 29023, 15039, 6952, 1568, 116 }, + { 789116, 22938, 4522, 1935, 520, 47 } + }, + }, { + { + { 124684, 37167, 15270, 8483, 1777, 102 }, + { 10405, 12395, 3401, 3574, 2461, 771 } + }, { + { 124684, 37167, 15270, 8483, 1777, 102 }, + { 10405, 12395, 3401, 3574, 2461, 771 } + } + }, { + { + { 41100, 22976, 15627, 16137, 7982, 793 }, + { 4249, 3084, 2131, 4081, 6439, 1653 } + }, { + { 41100, 22976, 15627, 16137, 7982, 793 }, + { 4249, 3084, 2131, 4081, 6439, 1653 } + } + } +}; + +static const unsigned int default_nzc8x8_counts[MAX_NZC_CONTEXTS] + [REF_TYPES] + [BLOCK_TYPES] + [NZC8X8_TOKENS] = { + { + { + { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 }, + { 72052, 30468, 6973, 3250, 1500, 750, 375, 5 }, + }, { + { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10 }, + { 72052, 30468, 6973, 3250, 1500, 750, 375, 5 }, + } + }, { + { + { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 }, + { 17772, 23120, 13127, 8115, 4000, 2000, 200, 6 }, + }, { + { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7 }, + { 17772, 23120, 13127, 8115, 4000, 2000, 200, 6 }, + } + }, { + { + { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 }, + { 6612, 13874, 13329, 13022, 6500, 3250, 300, 12 }, + }, { + { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17 }, + { 6612, 13874, 13329, 13022, 6500, 3250, 300, 12 }, + } + } +}; + +static const unsigned int default_nzc16x16_counts[MAX_NZC_CONTEXTS] + [REF_TYPES] + [BLOCK_TYPES] + [NZC16X16_TOKENS] = { + { + { + { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 }, + { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 }, + }, { + { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2 }, + { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 }, + } + }, { + { + { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 }, + { 17772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 }, + }, { + { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2 }, + { 17772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 }, + } + }, { + { + { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 }, + { 6612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 }, + }, { + { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5 }, + { 6612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 }, + } + } +}; + +static const unsigned int default_nzc32x32_counts[MAX_NZC_CONTEXTS] + [REF_TYPES] + [BLOCK_TYPES] + [NZC32X32_TOKENS] = { + { + { + { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 }, + { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 }, + }, { + { 372988, 62777, 19440, 11812, 5145, 1917, 439, 10, 5, 2, 1, 0 }, + { 72052, 30468, 6973, 3250, 1500, 750, 375, 50, 8, 1 }, + } + }, { + { + { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 }, + { 17772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 }, + }, { + { 121533, 33527, 15655, 11920, 5723, 2009, 315, 7, 4, 2, 1, 0 }, + { 17772, 23120, 13127, 8115, 4000, 2000, 200, 6, 4, 2 }, + } + }, { + { + { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 }, + { 6612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 }, + }, { + { 29408, 11758, 8023, 10123, 6705, 2468, 369, 17, 10, 5, 2, 1 }, + { 6612, 13874, 13329, 13022, 6500, 3250, 300, 12, 6, 3 }, + } + } +}; +#endif diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index 1e3a7e17e..c4908e29a 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -1,4 +1,4 @@ - /* +/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license @@ -186,6 +186,92 @@ static const vp9_prob Pcat6[] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; +#if CONFIG_CODE_NONZEROCOUNT +const vp9_tree_index vp9_nzc4x4_tree[2 * NZC4X4_NODES] = { + -NZC_0, 2, + 4, 6, + -NZC_1, -NZC_2, + -NZC_3TO4, 8, + -NZC_5TO8, -NZC_9TO16, +}; +struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS]; + +const vp9_tree_index vp9_nzc8x8_tree[2 * NZC8X8_NODES] = { + -NZC_0, 2, + 4, 6, + -NZC_1, -NZC_2, + 8, 10, + -NZC_3TO4, -NZC_5TO8, + -NZC_9TO16, 12, + -NZC_17TO32, -NZC_33TO64, +}; +struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS]; + +const vp9_tree_index vp9_nzc16x16_tree[2 * NZC16X16_NODES] = { + -NZC_0, 2, + 4, 6, + -NZC_1, -NZC_2, + 8, 10, + -NZC_3TO4, -NZC_5TO8, + 12, 14, + -NZC_9TO16, -NZC_17TO32, + -NZC_33TO64, 16, + -NZC_65TO128, -NZC_129TO256, +}; +struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS]; + +const vp9_tree_index vp9_nzc32x32_tree[2 * NZC32X32_NODES] = { + -NZC_0, 2, + 4, 6, + -NZC_1, -NZC_2, + 8, 10, + -NZC_3TO4, -NZC_5TO8, + 12, 14, + -NZC_9TO16, -NZC_17TO32, + 16, 18, + -NZC_33TO64, -NZC_65TO128, + -NZC_129TO256, 20, + -NZC_257TO512, -NZC_513TO1024, +}; +struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS]; + +const vp9_prob Pcat_nzc[MAX_NZC_CONTEXTS] + [NZC_TOKENS_EXTRA][NZC_BITS_EXTRA] = { { + // Bit probabilities are in least to most significance order + {176, 0, 0, 0, 0, 0, 0, 0, 0}, // 3 - 4 + {164, 192, 0, 0, 0, 0, 0, 0, 0}, // 5 - 8 + {154, 184, 208, 0, 0, 0, 0, 0, 0}, // 9 - 16 + {144, 176, 200, 216, 0, 0, 0, 0, 0}, // 17 - 32 + {140, 172, 192, 208, 224, 0, 0, 0, 0}, // 33 - 64 + {136, 168, 188, 200, 220, 232, 0, 0, 0}, // 65 - 128 + {132, 164, 184, 196, 216, 228, 240, 0, 0}, // 129 - 256 + {130, 162, 178, 194, 212, 226, 240, 248, 0}, // 257 - 512 + {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024 + }, { + {168, 0, 0, 0, 0, 0, 0, 0, 0}, // 3 - 4 + {152, 184, 0, 0, 0, 0, 0, 0, 0}, // 5 - 8 + {152, 184, 208, 0, 0, 0, 0, 0, 0}, // 9 - 16 + {144, 176, 200, 216, 0, 0, 0, 0, 0}, // 17 - 32 + {140, 172, 192, 208, 224, 0, 0, 0, 0}, // 33 - 64 + {136, 168, 188, 200, 220, 232, 0, 0, 0}, // 65 - 128 + {132, 164, 184, 196, 216, 228, 240, 0, 0}, // 129 - 256 + {130, 162, 178, 194, 212, 226, 240, 248, 0}, // 257 - 512 + {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024 + }, { + {160, 0, 0, 0, 0, 0, 0, 0, 0}, // 3 - 4 + {152, 176, 0, 0, 0, 0, 0, 0, 0}, // 5 - 8 + {150, 184, 208, 0, 0, 0, 0, 0, 0}, // 9 - 16 + {144, 176, 200, 216, 0, 0, 0, 0, 0}, // 17 - 32 + {140, 172, 192, 208, 224, 0, 0, 0, 0}, // 33 - 64 + {136, 168, 188, 200, 220, 232, 0, 0, 0}, // 65 - 128 + {132, 164, 184, 196, 216, 228, 240, 0, 0}, // 129 - 256 + {130, 162, 178, 194, 212, 226, 240, 248, 0}, // 257 - 512 + {128, 160, 176, 192, 208, 224, 240, 248, 254}, // 513 - 1024 + }, +}; + +#endif // CONFIG_CODE_NONZEROCOUNT + static vp9_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[28]; static void init_bit_tree(vp9_tree_index *p, int n) { @@ -253,6 +339,9 @@ int vp9_get_coef_context(int * recent_energy, int token) { }; void vp9_default_coef_probs(VP9_COMMON *pc) { +#if CONFIG_CODE_NONZEROCOUNT + int h, g; +#endif vpx_memcpy(pc->fc.coef_probs_4x4, default_coef_probs_4x4, sizeof(pc->fc.coef_probs_4x4)); vpx_memcpy(pc->fc.coef_probs_8x8, default_coef_probs_8x8, @@ -261,13 +350,1128 @@ void vp9_default_coef_probs(VP9_COMMON *pc) { sizeof(pc->fc.coef_probs_16x16)); vpx_memcpy(pc->fc.coef_probs_32x32, default_coef_probs_32x32, sizeof(pc->fc.coef_probs_32x32)); +#if CONFIG_CODE_NONZEROCOUNT + for (h = 0; h < MAX_NZC_CONTEXTS; ++h) { + for (g = 0; g < REF_TYPES; ++g) { + int i; + unsigned int branch_ct4x4[NZC4X4_NODES][2]; + unsigned int branch_ct8x8[NZC8X8_NODES][2]; + unsigned int branch_ct16x16[NZC16X16_NODES][2]; + unsigned int branch_ct32x32[NZC32X32_NODES][2]; + for (i = 0; i < BLOCK_TYPES; ++i) { + vp9_tree_probs_from_distribution( + NZC4X4_TOKENS, vp9_nzc4x4_encodings, vp9_nzc4x4_tree, + pc->fc.nzc_probs_4x4[h][g][i], branch_ct4x4, + default_nzc4x4_counts[h][g][i]); + } + for (i = 0; i < BLOCK_TYPES; ++i) { + vp9_tree_probs_from_distribution( + NZC8X8_TOKENS, vp9_nzc8x8_encodings, vp9_nzc8x8_tree, + pc->fc.nzc_probs_8x8[h][g][i], branch_ct8x8, + default_nzc8x8_counts[h][g][i]); + } + for (i = 0; i < BLOCK_TYPES; ++i) { + vp9_tree_probs_from_distribution( + NZC16X16_TOKENS, vp9_nzc16x16_encodings, vp9_nzc16x16_tree, + pc->fc.nzc_probs_16x16[h][g][i], branch_ct16x16, + default_nzc16x16_counts[h][g][i]); + } + for (i = 0; i < BLOCK_TYPES; ++i) { + vp9_tree_probs_from_distribution( + NZC32X32_TOKENS, vp9_nzc32x32_encodings, vp9_nzc32x32_tree, + pc->fc.nzc_probs_32x32[h][g][i], branch_ct32x32, + default_nzc32x32_counts[h][g][i]); + } + } + } +#endif // CONFIG_CODE_NONZEROCOUNTyy } void vp9_coef_tree_initialize() { init_bit_trees(); vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree); +#if CONFIG_CODE_NONZEROCOUNT + vp9_tokens_from_tree(vp9_nzc4x4_encodings, vp9_nzc4x4_tree); + vp9_tokens_from_tree(vp9_nzc8x8_encodings, vp9_nzc8x8_tree); + vp9_tokens_from_tree(vp9_nzc16x16_encodings, vp9_nzc16x16_tree); + vp9_tokens_from_tree(vp9_nzc32x32_encodings, vp9_nzc32x32_tree); +#endif } +#if CONFIG_CODE_NONZEROCOUNT + +#define mb_in_cur_tile(cm, mb_row, mb_col) \ + ((mb_col) >= (cm)->cur_tile_mb_col_start && \ + (mb_col) <= (cm)->cur_tile_mb_col_end && \ + (mb_row) >= 0) + +#define choose_nzc_context(nzc_exp, t2, t1) \ + ((nzc_exp) >= ((t2) << 6) ? 2 : (nzc_exp) >= ((t1) << 6) ? 1 : 0) + +#define NZC_T2_32X32 32 +#define NZC_T1_32X32 8 +#define NZC_T2_16X16 16 +#define NZC_T1_16X16 4 +#define NZC_T2_8X8 8 +#define NZC_T1_8X8 2 +#define NZC_T2_4X4 4 +#define NZC_T1_4X4 1 + +// Transforms a mb16 block index to a sb64 block index +static inline int mb16_to_sb64_index(int mb_row, int mb_col, int block) { + int r = (mb_row & 3); + int c = (mb_col & 3); + int b; + if (block < 16) { // Y + int ib = block >> 2; + int jb = block & 3; + ib += r * 4; + jb += c * 4; + b = ib * 16 + jb; + assert(b < 256); + return b; + } else { // UV + int base = block - (block & 3); + int ib = (block - base) >> 1; + int jb = (block - base) & 1; + ib += r * 2; + jb += c * 2; + b = base * 16 + ib * 8 + jb; + assert(b >= 256 && b < 384); + return b; + } +} + +// Transforms a mb16 block index to a sb32 block index +static inline int mb16_to_sb32_index(int mb_row, int mb_col, int block) { + int r = (mb_row & 1); + int c = (mb_col & 1); + int b; + if (block < 16) { // Y + int ib = block >> 2; + int jb = block & 3; + ib += r * 4; + jb += c * 4; + b = ib * 8 + jb; + assert(b < 64); + return b; + } else { // UV + int base = block - (block & 3); + int ib = (block - base) >> 1; + int jb = (block - base) & 1; + ib += r * 2; + jb += c * 2; + b = base * 4 + ib * 4 + jb; + assert(b >= 64 && b < 96); + return b; + } +} + +static inline int block_to_txfm_index(int block, TX_SIZE tx_size, int s) { + // s is the log of the number of 4x4 blocks in each row/col of larger block + int b, ib, jb, nb; + ib = block >> s; + jb = block - (ib << s); + ib >>= tx_size; + jb >>= tx_size; + nb = 1 << (s - tx_size); + b = (ib * nb + jb) << (2 * tx_size); + return b; +} + +/* BEGIN - Helper functions to get the y nzcs */ +static unsigned int get_nzc_4x4_y_sb64(MB_MODE_INFO *mi, int block) { + int b; + assert(block < 256); + b = block_to_txfm_index(block, mi->txfm_size, 4); + assert(b < 256); + return mi->nzcs[b] << (6 - 2 * mi->txfm_size); +} + +static unsigned int get_nzc_4x4_y_sb32(MB_MODE_INFO *mi, int block) { + int b; + assert(block < 64); + b = block_to_txfm_index(block, mi->txfm_size, 3); + assert(b < 64); + return mi->nzcs[b] << (6 - 2 * mi->txfm_size); +} + +static unsigned int get_nzc_4x4_y_mb16(MB_MODE_INFO *mi, int block) { + int b; + assert(block < 16); + b = block_to_txfm_index(block, mi->txfm_size, 2); + assert(b < 16); + return mi->nzcs[b] << (6 - 2 * mi->txfm_size); +} +/* END - Helper functions to get the y nzcs */ + +/* Function to get y nzc where block index is in mb16 terms */ +static unsigned int get_nzc_4x4_y(VP9_COMMON *cm, MODE_INFO *m, + int mb_row, int mb_col, int block) { + // NOTE: All values returned are at 64 times the true value at 4x4 scale + MB_MODE_INFO *const mi = &m->mbmi; + const int mis = cm->mode_info_stride; + if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col)) + return 0; + if (mi->sb_type == BLOCK_SIZE_SB64X64) { + int r = mb_row & 3; + int c = mb_col & 3; + m -= c + r * mis; + if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) + return 0; + else + return get_nzc_4x4_y_sb64( + &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block)); + } else if (mi->sb_type == BLOCK_SIZE_SB32X32) { + int r = mb_row & 1; + int c = mb_col & 1; + m -= c + r * mis; + if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) + return 0; + else + return get_nzc_4x4_y_sb32( + &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block)); + } else { + if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col)) + return 0; + return get_nzc_4x4_y_mb16(mi, block); + } +} + +/* BEGIN - Helper functions to get the uv nzcs */ +static unsigned int get_nzc_4x4_uv_sb64(MB_MODE_INFO *mi, int block) { + int b; + int base, uvtxfm_size; + assert(block >= 256 && block < 384); + uvtxfm_size = mi->txfm_size; + base = 256 + (block & 64); + block -= base; + b = base + block_to_txfm_index(block, uvtxfm_size, 3); + assert(b >= 256 && b < 384); + return mi->nzcs[b] << (6 - 2 * uvtxfm_size); +} + +static unsigned int get_nzc_4x4_uv_sb32(MB_MODE_INFO *mi, int block) { + int b; + int base, uvtxfm_size; + assert(block >= 64 && block < 96); + if (mi->txfm_size == TX_32X32) + uvtxfm_size = TX_16X16; + else + uvtxfm_size = mi->txfm_size; + base = 64 + (block & 16); + block -= base; + b = base + block_to_txfm_index(block, uvtxfm_size, 2); + assert(b >= 64 && b < 96); + return mi->nzcs[b] << (6 - 2 * uvtxfm_size); +} + +static unsigned int get_nzc_4x4_uv_mb16(MB_MODE_INFO *mi, int block) { + int b; + int base, uvtxfm_size; + assert(block >= 16 && block < 24); + if (mi->txfm_size == TX_8X8 && + (mi->mode == SPLITMV || mi->mode == I8X8_PRED)) + uvtxfm_size = TX_4X4; + else if (mi->txfm_size == TX_16X16) + uvtxfm_size = TX_8X8; + else + uvtxfm_size = mi->txfm_size; + base = 16 + (block & 4); + block -= base; + b = base + block_to_txfm_index(block, uvtxfm_size, 1); + assert(b >= 16 && b < 24); + return mi->nzcs[b] << (6 - 2 * uvtxfm_size); +} +/* END - Helper functions to get the uv nzcs */ + +/* Function to get uv nzc where block index is in mb16 terms */ +static unsigned int get_nzc_4x4_uv(VP9_COMMON *cm, MODE_INFO *m, + int mb_row, int mb_col, int block) { + // NOTE: All values returned are at 64 times the true value at 4x4 scale + MB_MODE_INFO *const mi = &m->mbmi; + const int mis = cm->mode_info_stride; + if (mi->mb_skip_coeff || !mb_in_cur_tile(cm, mb_row, mb_col)) + return 0; + if (mi->sb_type == BLOCK_SIZE_SB64X64) { + int r = mb_row & 3; + int c = mb_col & 3; + m -= c + r * mis; + if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) + return 0; + else + return get_nzc_4x4_uv_sb64( + &m->mbmi, mb16_to_sb64_index(mb_row, mb_col, block)); + } else if (mi->sb_type == BLOCK_SIZE_SB32X32) { + int r = mb_row & 1; + int c = mb_col & 1; + m -= c + r * mis; + if (m->mbmi.mb_skip_coeff || !mb_in_cur_tile(cm, mb_row - r, mb_col - c)) + return 0; + else + return get_nzc_4x4_uv_sb32( + &m->mbmi, mb16_to_sb32_index(mb_row, mb_col, block)); + } else { + return get_nzc_4x4_uv_mb16(mi, block); + } +} + +int vp9_get_nzc_context_y_sb64(VP9_COMMON *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block) { + // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy + // neighboring blocks are + int mis = cm->mode_info_stride; + int nzc_exp = 0; + TX_SIZE txfm_size = cur->mbmi.txfm_size; + assert(block < 256); + switch (txfm_size) { + case TX_32X32: + assert((block & 63) == 0); + if (block < 128) { + int o = (block >> 6) * 2; + nzc_exp = + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15) + + get_nzc_4x4_y(cm, cur - mis + o + 1, + mb_row - 1, mb_col + o + 1, 12) + + get_nzc_4x4_y(cm, cur - mis + o + 1, + mb_row - 1, mb_col + o + 1, 13) + + get_nzc_4x4_y(cm, cur - mis + o + 1, + mb_row - 1, mb_col + o + 1, 14) + + get_nzc_4x4_y(cm, cur - mis + o + 1, + mb_row - 1, mb_col + o + 1, 15); + } else { + nzc_exp = cur->mbmi.nzcs[block - 128] << 3; + } + if ((block & 127) == 0) { + int o = (block >> 7) * 2; + nzc_exp += + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15) + + get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, + mb_row + o + 1, mb_col - 1, 3) + + get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, + mb_row + o + 1, mb_col - 1, 7) + + get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, + mb_row + o + 1, mb_col - 1, 11) + + get_nzc_4x4_y(cm, cur - 1 + o * mis + mis, + mb_row + o + 1, mb_col - 1, 15); + } else { + nzc_exp += cur->mbmi.nzcs[block - 64] << 3; + } + nzc_exp <<= 2; + // Note nzc_exp is 64 times the average value expected at 32x32 scale + return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32); + break; + + case TX_16X16: + assert((block & 15) == 0); + if (block < 64) { + int o = block >> 4; + nzc_exp = + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15); + } else { + nzc_exp = cur->mbmi.nzcs[block - 64] << 4; + } + if ((block & 63) == 0) { + int o = block >> 6; + nzc_exp += + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15); + } else { + nzc_exp += cur->mbmi.nzcs[block - 16] << 4; + } + nzc_exp <<= 1; + // Note nzc_exp is 64 times the average value expected at 16x16 scale + return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); + break; + + case TX_8X8: + assert((block & 3) == 0); + if (block < 32) { + int o = block >> 3; + int p = ((block >> 2) & 1) ? 14 : 12; + nzc_exp = + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1); + } else { + nzc_exp = cur->mbmi.nzcs[block - 32] << 5; + } + if ((block & 31) == 0) { + int o = block >> 6; + int p = ((block >> 5) & 1) ? 11 : 3; + nzc_exp += + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4); + } else { + nzc_exp += cur->mbmi.nzcs[block - 4] << 5; + } + // Note nzc_exp is 64 times the average value expected at 8x8 scale + return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); + break; + + case TX_4X4: + if (block < 16) { + int o = block >> 2; + int p = block & 3; + nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, + 12 + p); + } else { + nzc_exp = (cur->mbmi.nzcs[block - 16] << 6); + } + if ((block & 15) == 0) { + int o = block >> 6; + int p = (block >> 4) & 3; + nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + 3 + 4 * p); + } else { + nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); + } + nzc_exp >>= 1; + // Note nzc_exp is 64 times the average value expected at 4x4 scale + return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); + break; + + default: + return 0; + } +} + +int vp9_get_nzc_context_y_sb32(VP9_COMMON *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block) { + // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy + // neighboring blocks are + int mis = cm->mode_info_stride; + int nzc_exp = 0; + TX_SIZE txfm_size = cur->mbmi.txfm_size; + assert(block < 64); + switch (txfm_size) { + case TX_32X32: + assert(block == 0); + nzc_exp = + (get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) + + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) + + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) + + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) + + get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 12) + + get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 13) + + get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 14) + + get_nzc_4x4_y(cm, cur - mis + 1, mb_row - 1, mb_col + 1, 15) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15) + + get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 3) + + get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 7) + + get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 11) + + get_nzc_4x4_y(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, 15)) << 2; + // Note nzc_exp is 64 times the average value expected at 32x32 scale + return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32); + break; + + case TX_16X16: + assert((block & 15) == 0); + if (block < 32) { + int o = (block >> 4) & 1; + nzc_exp = + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 12) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 13) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 14) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, 15); + } else { + nzc_exp = cur->mbmi.nzcs[block - 32] << 4; + } + if ((block & 31) == 0) { + int o = block >> 5; + nzc_exp += + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 3) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 7) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 11) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, 15); + } else { + nzc_exp += cur->mbmi.nzcs[block - 16] << 4; + } + nzc_exp <<= 1; + // Note nzc_exp is 64 times the average value expected at 16x16 scale + return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); + break; + + case TX_8X8: + assert((block & 3) == 0); + if (block < 16) { + int o = block >> 3; + int p = ((block >> 2) & 1) ? 14 : 12; + nzc_exp = + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p) + + get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, p + 1); + } else { + nzc_exp = cur->mbmi.nzcs[block - 16] << 5; + } + if ((block & 15) == 0) { + int o = block >> 5; + int p = ((block >> 4) & 1) ? 11 : 3; + nzc_exp += + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p) + + get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, p + 4); + } else { + nzc_exp += cur->mbmi.nzcs[block - 4] << 5; + } + // Note nzc_exp is 64 times the average value expected at 8x8 scale + return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); + break; + + case TX_4X4: + if (block < 8) { + int o = block >> 2; + int p = block & 3; + nzc_exp = get_nzc_4x4_y(cm, cur - mis + o, mb_row - 1, mb_col + o, + 12 + p); + } else { + nzc_exp = (cur->mbmi.nzcs[block - 8] << 6); + } + if ((block & 7) == 0) { + int o = block >> 5; + int p = (block >> 3) & 3; + nzc_exp += get_nzc_4x4_y(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + 3 + 4 * p); + } else { + nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); + } + nzc_exp >>= 1; + // Note nzc_exp is 64 times the average value expected at 4x4 scale + return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); + break; + + default: + return 0; + break; + } +} + +int vp9_get_nzc_context_y_mb16(VP9_COMMON *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block) { + // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy + // neighboring blocks are + int mis = cm->mode_info_stride; + int nzc_exp = 0; + TX_SIZE txfm_size = cur->mbmi.txfm_size; + assert(block < 16); + switch (txfm_size) { + case TX_16X16: + assert(block == 0); + nzc_exp = + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 12) + + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 13) + + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 14) + + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, 15) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 3) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 7) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 11) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, 15); + nzc_exp <<= 1; + // Note nzc_exp is 64 times the average value expected at 16x16 scale + return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); + + case TX_8X8: + assert((block & 3) == 0); + if (block < 8) { + int p = ((block >> 2) & 1) ? 14 : 12; + nzc_exp = + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p) + + get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, p + 1); + } else { + nzc_exp = cur->mbmi.nzcs[block - 8] << 5; + } + if ((block & 7) == 0) { + int p = ((block >> 3) & 1) ? 11 : 3; + nzc_exp += + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p) + + get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, p + 4); + } else { + nzc_exp += cur->mbmi.nzcs[block - 4] << 5; + } + // Note nzc_exp is 64 times the average value expected at 8x8 scale + return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); + + case TX_4X4: + if (block < 4) { + int p = block & 3; + nzc_exp = get_nzc_4x4_y(cm, cur - mis, mb_row - 1, mb_col, + 12 + p); + } else { + nzc_exp = (cur->mbmi.nzcs[block - 4] << 6); + } + if ((block & 3) == 0) { + int p = (block >> 2) & 3; + nzc_exp += get_nzc_4x4_y(cm, cur - 1, mb_row, mb_col - 1, + 3 + 4 * p); + } else { + nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); + } + nzc_exp >>= 1; + // Note nzc_exp is 64 times the average value expected at 4x4 scale + return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); + + default: + return 0; + break; + } +} + +int vp9_get_nzc_context_uv_sb64(VP9_COMMON *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block) { + // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy + // neighboring blocks are + int mis = cm->mode_info_stride; + int nzc_exp = 0; + const int base = block - (block & 63); + const int boff = (block & 63); + const int base_mb16 = base >> 4; + TX_SIZE txfm_size = cur->mbmi.txfm_size; + TX_SIZE txfm_size_uv; + + assert(block >= 256 && block < 384); + txfm_size_uv = txfm_size; + + switch (txfm_size_uv) { + case TX_32X32: + assert(block == 256 || block == 320); + nzc_exp = + get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + 2, mb_row - 1, mb_col + 2, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + 3, mb_row - 1, mb_col + 3, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + 2 * mis, mb_row + 2, mb_col - 1, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + 3 * mis, mb_row + 3, mb_col - 1, + base_mb16 + 3); + nzc_exp <<= 2; + // Note nzc_exp is 64 times the average value expected at 32x32 scale + return choose_nzc_context(nzc_exp, NZC_T2_32X32, NZC_T1_32X32); + + case TX_16X16: + // uv txfm_size 16x16 + assert((block & 15) == 0); + if (boff < 32) { + int o = (boff >> 4) & 1; + nzc_exp = + get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + o + 1, mb_row - 1, mb_col + o + 1, + base_mb16 + 3); + } else { + nzc_exp = cur->mbmi.nzcs[block - 32] << 4; + } + if ((boff & 31) == 0) { + int o = boff >> 5; + nzc_exp += + get_nzc_4x4_uv(cm, cur - 1 + o * mis, + mb_row + o, mb_col - 1, base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + o * mis, + mb_row + o, mb_col - 1, base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis, + mb_row + o + 1, mb_col - 1, base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + o * mis + mis, + mb_row + o + 1, mb_col - 1, base_mb16 + 3); + } else { + nzc_exp += cur->mbmi.nzcs[block - 16] << 4; + } + nzc_exp <<= 1; + // Note nzc_exp is 64 times the average value expected at 16x16 scale + return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); + + case TX_8X8: + assert((block & 3) == 0); + if (boff < 16) { + int o = boff >> 2; + nzc_exp = + get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 3); + } else { + nzc_exp = cur->mbmi.nzcs[block - 16] << 5; + } + if ((boff & 15) == 0) { + int o = boff >> 4; + nzc_exp += + get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + base_mb16 + 3); + } else { + nzc_exp += cur->mbmi.nzcs[block - 4] << 5; + } + // Note nzc_exp is 64 times the average value expected at 8x8 scale + return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); + + case TX_4X4: + if (boff < 8) { + int o = boff >> 1; + int p = boff & 1; + nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 2 + p); + } else { + nzc_exp = (cur->mbmi.nzcs[block - 8] << 6); + } + if ((boff & 7) == 0) { + int o = boff >> 4; + int p = (boff >> 3) & 1; + nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + base_mb16 + 1 + 2 * p); + } else { + nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); + } + nzc_exp >>= 1; + // Note nzc_exp is 64 times the average value expected at 4x4 scale + return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); + + default: + return 0; + } +} + +int vp9_get_nzc_context_uv_sb32(VP9_COMMON *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block) { + // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy + // neighboring blocks are + int mis = cm->mode_info_stride; + int nzc_exp = 0; + const int base = block - (block & 15); + const int boff = (block & 15); + const int base_mb16 = base >> 2; + TX_SIZE txfm_size = cur->mbmi.txfm_size; + TX_SIZE txfm_size_uv; + + assert(block >= 64 && block < 96); + if (txfm_size == TX_32X32) + txfm_size_uv = TX_16X16; + else + txfm_size_uv = txfm_size; + + switch (txfm_size_uv) { + case TX_16X16: + // uv txfm_size 16x16 + assert(block == 64 || block == 80); + nzc_exp = + get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + 1, mb_row - 1, mb_col + 1, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row, mb_col - 1, + base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + mis, mb_row + 1, mb_col - 1, + base_mb16 + 3); + nzc_exp <<= 1; + // Note nzc_exp is 64 times the average value expected at 16x16 scale + return choose_nzc_context(nzc_exp, NZC_T2_16X16, NZC_T1_16X16); + break; + + case TX_8X8: + assert((block & 3) == 0); + if (boff < 8) { + int o = boff >> 2; + nzc_exp = + get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 3); + } else { + nzc_exp = cur->mbmi.nzcs[block - 8] << 5; + } + if ((boff & 7) == 0) { + int o = boff >> 3; + nzc_exp += + get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + base_mb16 + 3); + } else { + nzc_exp += cur->mbmi.nzcs[block - 4] << 5; + } + // Note nzc_exp is 64 times the average value expected at 8x8 scale + return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); + + case TX_4X4: + if (boff < 4) { + int o = boff >> 1; + int p = boff & 1; + nzc_exp = get_nzc_4x4_uv(cm, cur - mis + o, mb_row - 1, mb_col + o, + base_mb16 + 2 + p); + } else { + nzc_exp = (cur->mbmi.nzcs[block - 4] << 6); + } + if ((boff & 3) == 0) { + int o = boff >> 3; + int p = (boff >> 2) & 1; + nzc_exp += get_nzc_4x4_uv(cm, cur - 1 + o * mis, mb_row + o, mb_col - 1, + base_mb16 + 1 + 2 * p); + } else { + nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); + } + nzc_exp >>= 1; + // Note nzc_exp is 64 times the average value expected at 4x4 scale + return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); + + default: + return 0; + } +} + +int vp9_get_nzc_context_uv_mb16(VP9_COMMON *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block) { + // returns an index in [0, MAX_NZC_CONTEXTS - 1] to reflect how busy + // neighboring blocks are + int mis = cm->mode_info_stride; + int nzc_exp = 0; + const int base = block - (block & 3); + const int boff = (block & 3); + const int base_mb16 = base; + TX_SIZE txfm_size = cur->mbmi.txfm_size; + TX_SIZE txfm_size_uv; + + assert(block >= 16 && block < 24); + if (txfm_size == TX_16X16) + txfm_size_uv = TX_8X8; + else if (txfm_size == TX_8X8 && + (cur->mbmi.mode == I8X8_PRED || cur->mbmi.mode == SPLITMV)) + txfm_size_uv = TX_4X4; + else + txfm_size_uv = txfm_size; + + switch (txfm_size_uv) { + case TX_8X8: + assert((block & 3) == 0); + nzc_exp = + get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 2) + + get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, base_mb16 + 3) + + get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 1) + + get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, base_mb16 + 3); + // Note nzc_exp is 64 times the average value expected at 8x8 scale + return choose_nzc_context(nzc_exp, NZC_T2_8X8, NZC_T1_8X8); + + case TX_4X4: + if (boff < 2) { + int p = boff & 1; + nzc_exp = get_nzc_4x4_uv(cm, cur - mis, mb_row - 1, mb_col, + base_mb16 + 2 + p); + } else { + nzc_exp = (cur->mbmi.nzcs[block - 2] << 6); + } + if ((boff & 1) == 0) { + int p = (boff >> 1) & 1; + nzc_exp += get_nzc_4x4_uv(cm, cur - 1, mb_row, mb_col - 1, + base_mb16 + 1 + 2 * p); + } else { + nzc_exp += (cur->mbmi.nzcs[block - 1] << 6); + } + nzc_exp >>= 1; + // Note nzc_exp is 64 times the average value expected at 4x4 scale + return choose_nzc_context(nzc_exp, NZC_T2_4X4, NZC_T1_4X4); + + default: + return 0; + } +} + +int vp9_get_nzc_context(VP9_COMMON *cm, MACROBLOCKD *xd, int block) { + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + assert(block < 384); + if (block < 256) + return vp9_get_nzc_context_y_sb64(cm, xd->mode_info_context, + get_mb_row(xd), get_mb_col(xd), block); + else + return vp9_get_nzc_context_uv_sb64(cm, xd->mode_info_context, + get_mb_row(xd), get_mb_col(xd), block); + } else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { + assert(block < 96); + if (block < 64) + return vp9_get_nzc_context_y_sb32(cm, xd->mode_info_context, + get_mb_row(xd), get_mb_col(xd), block); + else + return vp9_get_nzc_context_uv_sb32(cm, xd->mode_info_context, + get_mb_row(xd), get_mb_col(xd), block); + } else { + assert(block < 64); + if (block < 16) + return vp9_get_nzc_context_y_mb16(cm, xd->mode_info_context, + get_mb_row(xd), get_mb_col(xd), block); + else + return vp9_get_nzc_context_uv_mb16(cm, xd->mode_info_context, + get_mb_row(xd), get_mb_col(xd), block); + } +} + +static void update_nzc(VP9_COMMON *cm, + uint16_t nzc, + int nzc_context, + TX_SIZE tx_size, + int ref, + int type) { + int c; + c = codenzc(nzc); + if (tx_size == TX_32X32) + cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++; + else if (tx_size == TX_16X16) + cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++; + else if (tx_size == TX_8X8) + cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++; + else if (tx_size == TX_4X4) + cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++; + else + assert(0); + // TODO(debargha): Handle extra bits later if needed +} + +static void update_nzcs_sb64(VP9_COMMON *cm, + MACROBLOCKD *xd, + int mb_row, + int mb_col) { + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_32X32: + for (j = 0; j < 256; j += 64) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0); + } + for (j = 256; j < 384; j += 64) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1); + } + break; + + case TX_16X16: + for (j = 0; j < 256; j += 16) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0); + } + for (j = 256; j < 384; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1); + } + break; + + case TX_8X8: + for (j = 0; j < 256; j += 4) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0); + } + for (j = 256; j < 384; j += 4) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); + } + break; + + case TX_4X4: + for (j = 0; j < 256; ++j) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0); + } + for (j = 256; j < 384; ++j) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); + } + break; + + default: + break; + } +} + +static void update_nzcs_sb32(VP9_COMMON *cm, + MACROBLOCKD *xd, + int mb_row, + int mb_col) { + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_32X32: + for (j = 0; j < 64; j += 64) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0); + } + for (j = 64; j < 96; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1); + } + break; + + case TX_16X16: + for (j = 0; j < 64; j += 16) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0); + } + for (j = 64; j < 96; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1); + } + break; + + case TX_8X8: + for (j = 0; j < 64; j += 4) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0); + } + for (j = 64; j < 96; j += 4) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); + } + break; + + case TX_4X4: + for (j = 0; j < 64; ++j) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0); + } + for (j = 64; j < 96; ++j) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); + } + break; + + default: + break; + } +} + +static void update_nzcs_mb16(VP9_COMMON *cm, + MACROBLOCKD *xd, + int mb_row, + int mb_col) { + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_16X16: + for (j = 0; j < 16; j += 16) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0); + } + for (j = 16; j < 24; j += 4) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); + } + break; + + case TX_8X8: + for (j = 0; j < 16; j += 4) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0); + } + if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) { + for (j = 16; j < 24; ++j) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); + } + } else { + for (j = 16; j < 24; j += 4) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1); + } + } + break; + + case TX_4X4: + for (j = 0; j < 16; ++j) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0); + } + for (j = 16; j < 24; ++j) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + update_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1); + } + break; + + default: + break; + } +} + +void vp9_update_nzc_counts(VP9_COMMON *cm, + MACROBLOCKD *xd, + int mb_row, + int mb_col) { + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) + update_nzcs_sb64(cm, xd, mb_row, mb_col); + else if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) + update_nzcs_sb32(cm, xd, mb_row, mb_col); + else + update_nzcs_mb16(cm, xd, mb_row, mb_col); +} +#endif // CONFIG_CODE_NONZEROCOUNT + // #define COEF_COUNT_TESTING #define COEF_COUNT_SAT 24 @@ -277,10 +1481,10 @@ void vp9_coef_tree_initialize() { #define COEF_COUNT_SAT_AFTER_KEY 24 #define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 -static void update_coef_probs(vp9_coeff_probs *dst_coef_probs, - vp9_coeff_probs *pre_coef_probs, - int block_types, vp9_coeff_count *coef_counts, - int count_sat, int update_factor) { +static void adapt_coef_probs(vp9_coeff_probs *dst_coef_probs, + vp9_coeff_probs *pre_coef_probs, + int block_types, vp9_coeff_count *coef_counts, + int count_sat, int update_factor) { int t, i, j, k, l, count; unsigned int branch_ct[ENTROPY_NODES][2]; vp9_prob coef_probs[ENTROPY_NODES]; @@ -308,9 +1512,6 @@ static void update_coef_probs(vp9_coeff_probs *dst_coef_probs, } void vp9_adapt_coef_probs(VP9_COMMON *cm) { -#ifdef COEF_COUNT_TESTING - int t, i, j, k; -#endif int count_sat; int update_factor; /* denominator 256 */ @@ -326,16 +1527,121 @@ void vp9_adapt_coef_probs(VP9_COMMON *cm) { count_sat = COEF_COUNT_SAT; } - update_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4, - BLOCK_TYPES, cm->fc.coef_counts_4x4, - count_sat, update_factor); - update_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8, - BLOCK_TYPES, cm->fc.coef_counts_8x8, - count_sat, update_factor); - update_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16, - BLOCK_TYPES, cm->fc.coef_counts_16x16, - count_sat, update_factor); - update_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, - BLOCK_TYPES, cm->fc.coef_counts_32x32, - count_sat, update_factor); + adapt_coef_probs(cm->fc.coef_probs_4x4, cm->fc.pre_coef_probs_4x4, + BLOCK_TYPES, cm->fc.coef_counts_4x4, + count_sat, update_factor); + adapt_coef_probs(cm->fc.coef_probs_8x8, cm->fc.pre_coef_probs_8x8, + BLOCK_TYPES, cm->fc.coef_counts_8x8, + count_sat, update_factor); + adapt_coef_probs(cm->fc.coef_probs_16x16, cm->fc.pre_coef_probs_16x16, + BLOCK_TYPES, cm->fc.coef_counts_16x16, + count_sat, update_factor); + adapt_coef_probs(cm->fc.coef_probs_32x32, cm->fc.pre_coef_probs_32x32, + BLOCK_TYPES, cm->fc.coef_counts_32x32, + count_sat, update_factor); } + +#if CONFIG_CODE_NONZEROCOUNT +static void adapt_nzc_probs(VP9_COMMON *cm, + int block_size, + int count_sat, + int update_factor) { + int c, r, b, n; + int count, factor; + unsigned int nzc_branch_ct[NZC32X32_NODES][2]; + vp9_prob nzc_probs[NZC32X32_NODES]; + int tokens, nodes; + const vp9_tree_index *nzc_tree; + const struct vp9_token_struct *nzc_encodings; + vp9_prob *dst_nzc_probs; + vp9_prob *pre_nzc_probs; + unsigned int *nzc_counts; + + if (block_size == 32) { + tokens = NZC32X32_TOKENS; + nzc_tree = vp9_nzc32x32_tree; + nzc_encodings = vp9_nzc32x32_encodings; + dst_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0]; + pre_nzc_probs = cm->fc.pre_nzc_probs_32x32[0][0][0]; + nzc_counts = cm->fc.nzc_counts_32x32[0][0][0]; + } else if (block_size == 16) { + tokens = NZC16X16_TOKENS; + nzc_tree = vp9_nzc16x16_tree; + nzc_encodings = vp9_nzc16x16_encodings; + dst_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0]; + pre_nzc_probs = cm->fc.pre_nzc_probs_16x16[0][0][0]; + nzc_counts = cm->fc.nzc_counts_16x16[0][0][0]; + } else if (block_size == 8) { + tokens = NZC8X8_TOKENS; + nzc_tree = vp9_nzc8x8_tree; + nzc_encodings = vp9_nzc8x8_encodings; + dst_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0]; + pre_nzc_probs = cm->fc.pre_nzc_probs_8x8[0][0][0]; + nzc_counts = cm->fc.nzc_counts_8x8[0][0][0]; + } else { + nzc_tree = vp9_nzc4x4_tree; + nzc_encodings = vp9_nzc4x4_encodings; + tokens = NZC4X4_TOKENS; + dst_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0]; + pre_nzc_probs = cm->fc.pre_nzc_probs_4x4[0][0][0]; + nzc_counts = cm->fc.nzc_counts_4x4[0][0][0]; + } + nodes = tokens - 1; + for (c = 0; c < MAX_NZC_CONTEXTS; ++c) + for (r = 0; r < REF_TYPES; ++r) + for (b = 0; b < BLOCK_TYPES; ++b) { + int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; + int offset_nodes = offset * nodes; + int offset_tokens = offset * tokens; + vp9_tree_probs_from_distribution(tokens, + nzc_encodings, nzc_tree, + nzc_probs, nzc_branch_ct, + nzc_counts + offset_tokens); + for (n = 0; n < nodes; ++n) { + count = nzc_branch_ct[n][0] + nzc_branch_ct[n][1]; + count = count > count_sat ? count_sat : count; + factor = (update_factor * count / count_sat); + dst_nzc_probs[offset_nodes + n] = + weighted_prob(pre_nzc_probs[offset_nodes + n], + nzc_probs[n], factor); + } + } +} + +// #define NZC_COUNT_TESTING +void vp9_adapt_nzc_probs(VP9_COMMON *cm) { + int count_sat; + int update_factor; /* denominator 256 */ +#ifdef NZC_COUNT_TESTING + int c, r, b, t; + printf("\n"); + for (c = 0; c < MAX_NZC_CONTEXTS; ++c) + for (r = 0; r < REF_TYPES; ++r) { + for (b = 0; b < BLOCK_TYPES; ++b) { + printf(" {"); + for (t = 0; t < NZC4X4_TOKENS; ++t) { + printf(" %d,", cm->fc.nzc_counts_4x4[c][r][b][t]); + } + printf("}\n"); + } + printf("\n"); + } +#endif + + if (cm->frame_type == KEY_FRAME) { + update_factor = COEF_MAX_UPDATE_FACTOR_KEY; + count_sat = COEF_COUNT_SAT_KEY; + } else if (cm->last_frame_type == KEY_FRAME) { + update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ + count_sat = COEF_COUNT_SAT_AFTER_KEY; + } else { + update_factor = COEF_MAX_UPDATE_FACTOR; + count_sat = COEF_COUNT_SAT; + } + + adapt_nzc_probs(cm, 4, count_sat, update_factor); + adapt_nzc_probs(cm, 8, count_sat, update_factor); + adapt_nzc_probs(cm, 16, count_sat, update_factor); + adapt_nzc_probs(cm, 32, count_sat, update_factor); +} +#endif // CONFIG_CODE_NONZEROCOUNT diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 8d28b0058..25ba3c08d 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -142,4 +142,80 @@ static int get_coef_band(TX_SIZE tx_size, int coef_index) { } extern int vp9_get_coef_context(int * recent_energy, int token); +#if CONFIG_CODE_NONZEROCOUNT +/* Alphabet for number of non-zero symbols in block */ +#define NZC_0 0 /* Used for all blocks */ +#define NZC_1 1 /* Used for all blocks */ +#define NZC_2 2 /* Used for all blocks */ +#define NZC_3TO4 3 /* Used for all blocks */ +#define NZC_5TO8 4 /* Used for all blocks */ +#define NZC_9TO16 5 /* Used for all blocks */ +#define NZC_17TO32 6 /* Used for 8x8 and larger blocks */ +#define NZC_33TO64 7 /* Used for 8x8 and larger blocks */ +#define NZC_65TO128 8 /* Used for 16x16 and larger blocks */ +#define NZC_129TO256 9 /* Used for 16x16 and larger blocks */ +#define NZC_257TO512 10 /* Used for 32x32 and larger blocks */ +#define NZC_513TO1024 11 /* Used for 32x32 and larger blocks */ + +/* Number of tokens for each block size */ +#define NZC4X4_TOKENS 6 +#define NZC8X8_TOKENS 8 +#define NZC16X16_TOKENS 10 +#define NZC32X32_TOKENS 12 + +/* Number of nodes for each block size */ +#define NZC4X4_NODES 5 +#define NZC8X8_NODES 7 +#define NZC16X16_NODES 9 +#define NZC32X32_NODES 11 + +/* Max number of tokens with extra bits */ +#define NZC_TOKENS_EXTRA 9 + +/* Max number of extra bits */ +#define NZC_BITS_EXTRA 9 + +#define MAX_NZC_CONTEXTS 3 + +/* nzc trees */ +extern const vp9_tree_index vp9_nzc4x4_tree[]; +extern const vp9_tree_index vp9_nzc8x8_tree[]; +extern const vp9_tree_index vp9_nzc16x16_tree[]; +extern const vp9_tree_index vp9_nzc32x32_tree[]; + +/* nzc encodings */ +extern struct vp9_token_struct vp9_nzc4x4_encodings[NZC4X4_TOKENS]; +extern struct vp9_token_struct vp9_nzc8x8_encodings[NZC8X8_TOKENS]; +extern struct vp9_token_struct vp9_nzc16x16_encodings[NZC16X16_TOKENS]; +extern struct vp9_token_struct vp9_nzc32x32_encodings[NZC32X32_TOKENS]; + +#define codenzc(x) (\ + (x) <= 3 ? (x) : (x) <= 4 ? 3 : (x) <= 8 ? 4 : \ + (x) <= 16 ? 5 : (x) <= 32 ? 6 : (x) <= 64 ? 7 :\ + (x) <= 128 ? 8 : (x) <= 256 ? 9 : (x) <= 512 ? 10 : 11) +#define extranzcbits(c) ((c) <= 2 ? 0 : (c) - 2) +#define basenzcvalue(c) ((c) <= 2 ? (c) : (1 << ((c) - 2)) + 1) + +int vp9_get_nzc_context_y_sb64(struct VP9Common *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block); +int vp9_get_nzc_context_y_sb32(struct VP9Common *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block); +int vp9_get_nzc_context_y_mb16(struct VP9Common *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block); +int vp9_get_nzc_context_uv_sb64(struct VP9Common *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block); +int vp9_get_nzc_context_uv_sb32(struct VP9Common *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block); +int vp9_get_nzc_context_uv_mb16(struct VP9Common *cm, MODE_INFO *cur, + int mb_row, int mb_col, int block); +int vp9_get_nzc_context(struct VP9Common *cm, MACROBLOCKD *xd, int block); +void vp9_update_nzc_counts(struct VP9Common *cm, MACROBLOCKD *xd, + int mb_row, int mb_col); +void vp9_adapt_nzc_probs(struct VP9Common *cm); + +/* Extra bit probabilities - block size agnostic */ +extern const vp9_prob Pcat_nzc[MAX_NZC_CONTEXTS][NZC_TOKENS_EXTRA] + [NZC_BITS_EXTRA]; + +#endif // CONFIG_CODE_NONZEROCOUNT #endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 48d19a332..2cdb9c468 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -58,10 +58,21 @@ typedef struct frame_contexts { vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; + vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES]; vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES]; vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES]; +#if CONFIG_CODE_NONZEROCOUNT + vp9_prob nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC4X4_NODES]; + vp9_prob nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC8X8_NODES]; + vp9_prob nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC16X16_NODES]; + vp9_prob nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC32X32_NODES]; +#endif nmv_context nmvc; nmv_context pre_nmvc; @@ -84,11 +95,31 @@ typedef struct frame_contexts { vp9_coeff_probs pre_coef_probs_8x8[BLOCK_TYPES]; vp9_coeff_probs pre_coef_probs_16x16[BLOCK_TYPES]; vp9_coeff_probs pre_coef_probs_32x32[BLOCK_TYPES]; +#if CONFIG_CODE_NONZEROCOUNT + vp9_prob pre_nzc_probs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC4X4_NODES]; + vp9_prob pre_nzc_probs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC8X8_NODES]; + vp9_prob pre_nzc_probs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC16X16_NODES]; + vp9_prob pre_nzc_probs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC32X32_NODES]; +#endif vp9_coeff_count coef_counts_4x4[BLOCK_TYPES]; vp9_coeff_count coef_counts_8x8[BLOCK_TYPES]; vp9_coeff_count coef_counts_16x16[BLOCK_TYPES]; vp9_coeff_count coef_counts_32x32[BLOCK_TYPES]; +#if CONFIG_CODE_NONZEROCOUNT + unsigned int nzc_counts_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC4X4_TOKENS]; + unsigned int nzc_counts_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC8X8_TOKENS]; + unsigned int nzc_counts_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC16X16_TOKENS]; + unsigned int nzc_counts_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES] + [NZC32X32_TOKENS]; +#endif nmv_context_counts NMVcount; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] @@ -300,4 +331,31 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) { buf[new_idx]++; } +// TODO(debargha): merge the two functions +static void set_mb_row(VP9_COMMON *cm, MACROBLOCKD *xd, + int mb_row, int block_size) { + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; + + // Are edges available for intra prediction? + xd->up_available = (mb_row != 0); +} + +static void set_mb_col(VP9_COMMON *cm, MACROBLOCKD *xd, + int mb_col, int block_size) { + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; + + // Are edges available for intra prediction? + xd->left_available = (mb_col > cm->cur_tile_mb_col_start); + xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end); +} + +static int get_mb_row(const MACROBLOCKD *xd) { + return ((-xd->mb_to_top_edge) >> 7); +} + +static int get_mb_col(const MACROBLOCKD *xd) { + return ((-xd->mb_to_left_edge) >> 7); +} #endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 326c80239..89dcdc09d 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -173,7 +173,6 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.mb_skip_coeff = 0; } - y_mode = m->mbmi.sb_type ? read_kf_sb_ymode(bc, pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]): @@ -677,22 +676,23 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->need_to_clamp_secondmv = 0; mbmi->second_ref_frame = NONE; - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to MV values - // that are in 1/8th pel units - xd->mb_to_left_edge = mb_to_left_edge - = -((mb_col * 16) << 3); - mb_to_left_edge -= LEFT_TOP_MARGIN; - - xd->mb_to_right_edge = mb_to_right_edge - = ((pbi->common.mb_cols - mb_size - mb_col) * 16) << 3; - mb_to_right_edge += RIGHT_BOTTOM_MARGIN; - // Make sure the MACROBLOCKD mode info pointer is pointed at the // correct entry for the current macroblock. xd->mode_info_context = mi; xd->prev_mode_info_context = prev_mi; + // Distance of Mb to the various image edges. + // These specified to 8th pel as they are always compared to MV values + // that are in 1/8th pel units + set_mb_row(cm, xd, mb_row, mb_size); + set_mb_col(cm, xd, mb_col, mb_size); + + mb_to_left_edge = xd->mb_to_left_edge; + mb_to_left_edge -= LEFT_TOP_MARGIN; + + mb_to_right_edge = xd->mb_to_right_edge; + mb_to_right_edge += RIGHT_BOTTOM_MARGIN; + // Read the macroblock segment id. read_mb_segment_id(pbi, mb_row, mb_col, bc); @@ -750,17 +750,6 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, vp9_mv_ref_probs(&pbi->common, mv_ref_p, mbmi->mb_mode_context[ref_frame]); - /* - if (pbi->common.current_video_frame == 1) { - int k = mbmi->mb_mode_context[ref_frame]; - printf("vp9_mode_contexts: [%d %d %d %d] %d %d %d %d\n", - mb_row, mb_col, ref_frame, k, - cm->fc.vp9_mode_contexts[k][0], - cm->fc.vp9_mode_contexts[k][1], - cm->fc.vp9_mode_contexts[k][2], - cm->fc.vp9_mode_contexts[k][3]); - } - */ // If the segment level skip mode enabled if (vp9_segfeature_active(xd, mbmi->segment_id, SEG_LVL_SKIP)) { @@ -1176,20 +1165,270 @@ void vp9_decode_mode_mvs_init(VP9D_COMP* const pbi, BOOL_DECODER* const bc) { vpx_memset(cm->mbskip_pred_probs, 0, sizeof(cm->mbskip_pred_probs)); if (pbi->common.mb_no_coeff_skip) { int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) + for (k = 0; k < MBSKIP_CONTEXTS; ++k) { cm->mbskip_pred_probs[k] = (vp9_prob)vp9_read_literal(bc, 8); + } } mb_mode_mv_init(pbi, bc); } +#if CONFIG_CODE_NONZEROCOUNT +static uint16_t read_nzc(VP9_COMMON *const cm, + int nzc_context, + TX_SIZE tx_size, + int ref, + int type, + BOOL_DECODER* const bc) { + int c, e; + uint16_t nzc; + if (tx_size == TX_32X32) { + c = treed_read(bc, vp9_nzc32x32_tree, + cm->fc.nzc_probs_32x32[nzc_context][ref][type]); + cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++; + } else if (tx_size == TX_16X16) { + c = treed_read(bc, vp9_nzc16x16_tree, + cm->fc.nzc_probs_16x16[nzc_context][ref][type]); + cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++; + } else if (tx_size == TX_8X8) { + c = treed_read(bc, vp9_nzc8x8_tree, + cm->fc.nzc_probs_8x8[nzc_context][ref][type]); + cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++; + } else if (tx_size == TX_4X4) { + c = treed_read(bc, vp9_nzc4x4_tree, + cm->fc.nzc_probs_4x4[nzc_context][ref][type]); + cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++; + } else { + assert(0); + } + nzc = basenzcvalue(c); + if ((e = extranzcbits(c))) { + int x = 0; + while (e--) + x |= (vp9_read(bc, Pcat_nzc[nzc_context][c - 3][e]) << e); + nzc += x; + } + if (tx_size == TX_32X32) + assert(nzc <= 1024); + else if (tx_size == TX_16X16) + assert(nzc <= 256); + else if (tx_size == TX_8X8) + assert(nzc <= 64); + else if (tx_size == TX_4X4) + assert(nzc <= 16); + return nzc; +} + +static void read_nzcs_sb64(VP9_COMMON *const cm, + MACROBLOCKD* xd, + int mb_row, + int mb_col, + BOOL_DECODER* const bc) { + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0])); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_32X32: + for (j = 0; j < 256; j += 64) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc); + } + for (j = 256; j < 384; j += 64) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 1, bc); + } + break; + + case TX_16X16: + for (j = 0; j < 256; j += 16) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); + } + for (j = 256; j < 384; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); + } + break; + + case TX_8X8: + for (j = 0; j < 256; j += 4) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); + } + for (j = 256; j < 384; j += 4) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + } + break; + + case TX_4X4: + for (j = 0; j < 256; ++j) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); + } + for (j = 256; j < 384; ++j) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + } + break; + + default: + break; + } +} + +static void read_nzcs_sb32(VP9_COMMON *const cm, + MACROBLOCKD* xd, + int mb_row, + int mb_col, + BOOL_DECODER* const bc) { + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0])); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_32X32: + for (j = 0; j < 64; j += 64) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_32X32, ref, 0, bc); + } + for (j = 64; j < 96; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); + } + break; + + case TX_16X16: + for (j = 0; j < 64; j += 16) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); + } + for (j = 64; j < 96; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 1, bc); + } + break; + + case TX_8X8: + for (j = 0; j < 64; j += 4) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); + } + for (j = 64; j < 96; j += 4) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + } + break; + + case TX_4X4: + for (j = 0; j < 64; ++j) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); + } + for (j = 64; j < 96; ++j) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + } + break; + + default: + break; + } +} + +static void read_nzcs_mb16(VP9_COMMON *const cm, + MACROBLOCKD* xd, + int mb_row, + int mb_col, + BOOL_DECODER* const bc) { + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + vpx_memset(m->mbmi.nzcs, 0, 384 * sizeof(m->mbmi.nzcs[0])); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_16X16: + for (j = 0; j < 16; j += 16) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_16X16, ref, 0, bc); + } + for (j = 16; j < 24; j += 4) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + } + break; + + case TX_8X8: + for (j = 0; j < 16; j += 4) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 0, bc); + } + if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) { + for (j = 16; j < 24; ++j) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + } + } else { + for (j = 16; j < 24; j += 4) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_8X8, ref, 1, bc); + } + } + break; + + case TX_4X4: + for (j = 0; j < 16; ++j) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 0, bc); + } + for (j = 16; j < 24; ++j) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + m->mbmi.nzcs[j] = read_nzc(cm, nzc_context, TX_4X4, ref, 1, bc); + } + break; + + default: + break; + } +} +#endif // CONFIG_CODE_NONZEROCOUNT + void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, MACROBLOCKD* const xd, int mb_row, int mb_col, BOOL_DECODER* const bc) { + VP9_COMMON *const cm = &pbi->common; MODE_INFO *mi = xd->mode_info_context; MODE_INFO *prev_mi = xd->prev_mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; if (pbi->common.frame_type == KEY_FRAME) { kfread_modes(pbi, mi, mb_row, mb_col, bc); @@ -1199,4 +1438,28 @@ void vp9_decode_mb_mode_mv(VP9D_COMP* const pbi, mi->mbmi.ref_frame - 1, mi->mbmi.second_ref_frame - 1, pbi->common.active_ref_scale); } +#if CONFIG_CODE_NONZEROCOUNT + if (mbmi->sb_type == BLOCK_SIZE_SB64X64) + read_nzcs_sb64(cm, xd, mb_row, mb_col, bc); + else if (mbmi->sb_type == BLOCK_SIZE_SB32X32) + read_nzcs_sb32(cm, xd, mb_row, mb_col, bc); + else + read_nzcs_mb16(cm, xd, mb_row, mb_col, bc); +#endif // CONFIG_CODE_NONZEROCOUNT + + if (mbmi->sb_type) { + const int n_mbs = 1 << mbmi->sb_type; + const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); + const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); + const int mis = cm->mode_info_stride; + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + mi[y * mis + x] = *mi; + } + } + } else { + update_blockd_bmi(xd); + } } diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index b44d6595b..8dfb3e851 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -39,7 +39,7 @@ #define COEFCOUNT_TESTING -//#define DEC_DEBUG +// #define DEC_DEBUG #ifdef DEC_DEBUG int dec_debug = 0; #endif @@ -246,7 +246,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, int i; printf("\n"); printf("qcoeff 8x8\n"); - for (i = 0; i < 400; i++) { + for (i = 0; i < 384; i++) { printf("%3d ", xd->qcoeff[i]); if (i % 16 == 15) printf("\n"); } @@ -862,14 +862,9 @@ static void set_offsets(VP9D_COMP *pbi, int block_size, * values that are in 1/8th pel units */ block_size >>= 4; // in mb units - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col > cm->cur_tile_mb_col_start); - xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end); + set_mb_row(cm, xd, mb_row, block_size); + set_mb_col(cm, xd, mb_col, block_size); xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; @@ -910,20 +905,6 @@ static void set_refs(VP9D_COMP *pbi, int block_size, xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted; } } - - if (mbmi->sb_type) { - const int n_mbs = 1 << mbmi->sb_type; - const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); - const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); - const int mis = cm->mode_info_stride; - int x, y; - - for (y = 0; y < y_mbs; y++) { - for (x = !y; x < x_mbs; x++) { - mi[y * mis + x] = *mi; - } - } - } } /* Decode a row of Superblocks (2x2 region of MBs) */ @@ -938,6 +919,11 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, for (mb_col = pc->cur_tile_mb_col_start; mb_col < pc->cur_tile_mb_col_end; mb_col += 4) { if (vp9_read(bc, pc->sb64_coded)) { +#ifdef DEC_DEBUG + dec_debug = (pc->current_video_frame == 1 && mb_row == 0 && mb_col == 0); + if (dec_debug) + printf("Debug\n"); +#endif set_offsets(pbi, 64, mb_row, mb_col); vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); set_refs(pbi, 64, mb_row, mb_col); @@ -958,6 +944,10 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, xd->sb_index = j; if (vp9_read(bc, pc->sb32_coded)) { +#ifdef DEC_DEBUG + dec_debug = (pc->current_video_frame == 1 && + mb_row + y_idx_sb == 0 && mb_col + x_idx_sb == 0); +#endif set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); @@ -978,11 +968,14 @@ static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, // MB lies outside frame, skip on to next continue; } +#ifdef DEC_DEBUG + dec_debug = (pc->current_video_frame == 1 && + mb_row + y_idx == 0 && mb_col + x_idx == 0); +#endif set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx); xd->mb_index = i; vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); - update_blockd_bmi(xd); set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx); decode_macroblock(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); @@ -1073,6 +1066,63 @@ static void init_frame(VP9D_COMP *pbi) { xd->fullpixel_mask = 0xfffffff8; } +#if CONFIG_CODE_NONZEROCOUNT +static void read_nzc_probs_common(VP9_COMMON *cm, + BOOL_DECODER* const bc, + int block_size) { + int c, r, b, t; + int tokens, nodes; + vp9_prob *nzc_probs; + vp9_prob upd; + + if (!vp9_read_bit(bc)) return; + + if (block_size == 32) { + tokens = NZC32X32_TOKENS; + nzc_probs = cm->fc.nzc_probs_32x32[0][0][0]; + upd = NZC_UPDATE_PROB_32X32; + } else if (block_size == 16) { + tokens = NZC16X16_TOKENS; + nzc_probs = cm->fc.nzc_probs_16x16[0][0][0]; + upd = NZC_UPDATE_PROB_16X16; + } else if (block_size == 8) { + tokens = NZC8X8_TOKENS; + nzc_probs = cm->fc.nzc_probs_8x8[0][0][0]; + upd = NZC_UPDATE_PROB_8X8; + } else { + tokens = NZC4X4_TOKENS; + nzc_probs = cm->fc.nzc_probs_4x4[0][0][0]; + upd = NZC_UPDATE_PROB_4X4; + } + nodes = tokens - 1; + for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { + for (r = 0; r < REF_TYPES; ++r) { + for (b = 0; b < BLOCK_TYPES; ++b) { + int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; + int offset_nodes = offset * nodes; + for (t = 0; t < nodes; ++t) { + vp9_prob *p = &nzc_probs[offset_nodes + t]; + if (vp9_read(bc, upd)) { + *p = read_prob_diff_update(bc, *p); + } + } + } + } + } +} + +static void read_nzc_probs(VP9_COMMON *cm, + BOOL_DECODER* const bc) { + read_nzc_probs_common(cm, bc, 4); + if (cm->txfm_mode != ONLY_4X4) + read_nzc_probs_common(cm, bc, 8); + if (cm->txfm_mode > ALLOW_8X8) + read_nzc_probs_common(cm, bc, 16); + if (cm->txfm_mode > ALLOW_16X16) + read_nzc_probs_common(cm, bc, 32); +} +#endif // CONFIG_CODE_NONZEROCOUNT + static void read_coef_probs_common(BOOL_DECODER* const bc, vp9_coeff_probs *coef_probs, int block_types) { @@ -1085,7 +1135,7 @@ static void read_coef_probs_common(BOOL_DECODER* const bc, for (l = 0; l < PREV_COEF_CONTEXTS; l++) { if (l >= 3 && k == 0) continue; - for (m = 0; m < ENTROPY_NODES; m++) { + for (m = CONFIG_CODE_NONZEROCOUNT; m < ENTROPY_NODES; m++) { vp9_prob *const p = coef_probs[i][j][k][l] + m; if (vp9_read(bc, COEF_UPDATE_PROB)) { @@ -1539,6 +1589,17 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { pbi->common.fc.pre_interintra_prob = pbi->common.fc.interintra_prob; #endif pbi->common.fc.pre_nmvc = pbi->common.fc.nmvc; +#if CONFIG_CODE_NONZEROCOUNT + vp9_copy(pbi->common.fc.pre_nzc_probs_4x4, + pbi->common.fc.nzc_probs_4x4); + vp9_copy(pbi->common.fc.pre_nzc_probs_8x8, + pbi->common.fc.nzc_probs_8x8); + vp9_copy(pbi->common.fc.pre_nzc_probs_16x16, + pbi->common.fc.nzc_probs_16x16); + vp9_copy(pbi->common.fc.pre_nzc_probs_32x32, + pbi->common.fc.nzc_probs_32x32); +#endif + vp9_zero(pbi->common.fc.coef_counts_4x4); vp9_zero(pbi->common.fc.coef_counts_8x8); vp9_zero(pbi->common.fc.coef_counts_16x16); @@ -1555,8 +1616,17 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { #if CONFIG_COMP_INTERINTRA_PRED vp9_zero(pbi->common.fc.interintra_counts); #endif +#if CONFIG_CODE_NONZEROCOUNT + vp9_zero(pbi->common.fc.nzc_counts_4x4); + vp9_zero(pbi->common.fc.nzc_counts_8x8); + vp9_zero(pbi->common.fc.nzc_counts_16x16); + vp9_zero(pbi->common.fc.nzc_counts_32x32); +#endif read_coef_probs(pbi, &header_bc); +#if CONFIG_CODE_NONZEROCOUNT + read_nzc_probs(&pbi->common, &header_bc); +#endif /* Initialize xd pointers. Any reference should do for xd->pre, so use 0. */ vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->active_ref_idx[0]], @@ -1700,8 +1770,12 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } if (!pc->error_resilient_mode && - !pc->frame_parallel_decoding_mode) + !pc->frame_parallel_decoding_mode) { vp9_adapt_coef_probs(pc); +#if CONFIG_CODE_NONZEROCOUNT + vp9_adapt_nzc_probs(pc); +#endif + } if (pc->frame_type != KEY_FRAME) { if (!pc->error_resilient_mode && !pc->frame_parallel_decoding_mode) { diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index a192266ef..a53edfc3c 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -69,13 +69,24 @@ static int get_signed(BOOL_DECODER *br, int value_to_sign) { pt = vp9_get_coef_context(&recent_energy, token); \ } while (0) +#if CONFIG_CODE_NONZEROCOUNT #define WRITE_COEF_CONTINUE(val, token) \ { \ - qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \ + qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \ + INCREMENT_COUNT(token); \ + c++; \ + nzc++; \ + continue; \ + } +#else +#define WRITE_COEF_CONTINUE(val, token) \ + { \ + qcoeff_ptr[scan[c]] = (int16_t) get_signed(br, val); \ INCREMENT_COUNT(token); \ c++; \ continue; \ } +#endif // CONFIG_CODE_NONZEROCOUNT #define ADJUST_COEF(prob, bits_count) \ do { \ @@ -99,6 +110,10 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, vp9_prob *prob; vp9_coeff_count *coef_counts; const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME; +#if CONFIG_CODE_NONZEROCOUNT + uint16_t nzc = 0; + uint16_t nzc_expected = xd->mode_info_context->mbmi.nzcs[block_idx]; +#endif if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { aidx = vp9_block2above_sb64[txfm_size][block_idx]; @@ -170,12 +185,24 @@ static int decode_coefs(VP9D_COMP *dx, const MACROBLOCKD *xd, if (c >= seg_eob) break; +#if CONFIG_CODE_NONZEROCOUNT + if (nzc == nzc_expected) + break; +#endif prob = coef_probs[type][ref][get_coef_band(txfm_size, c)][pt]; +#if CONFIG_CODE_NONZEROCOUNT == 0 if (!vp9_read(br, prob[EOB_CONTEXT_NODE])) break; +#endif SKIP_START: if (c >= seg_eob) break; +#if CONFIG_CODE_NONZEROCOUNT + if (nzc == nzc_expected) + break; + // decode zero node only if there are zeros left + if (seg_eob - nzc_expected - c + nzc > 0) +#endif if (!vp9_read(br, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); ++c; @@ -242,8 +269,10 @@ SKIP_START: WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6); } +#if CONFIG_CODE_NONZEROCOUNT == 0 if (c < seg_eob) coef_counts[type][ref][get_coef_band(txfm_size, c)][pt][DCT_EOB_TOKEN]++; +#endif A0[aidx] = L0[lidx] = c > 0; if (txfm_size >= TX_8X8) { @@ -272,7 +301,6 @@ SKIP_START: } } } - return c; } diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index b05da870c..c6267f172 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -720,10 +720,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, // Distance of Mb to the various image edges. // These specified to 8th pel as they are always compared to MV // values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_right_edge = ((pc->mb_cols - mb_size - mb_col) * 16) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - mb_size - mb_row) * 16) << 3; + + set_mb_row(pc, xd, mb_row, mb_size); + set_mb_col(pc, xd, mb_col, mb_size); #ifdef ENTROPY_STATS active_section = 9; @@ -752,18 +751,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { skip_coeff = 1; } else { - const int nmbs = mb_size; - const int xmbs = MIN(nmbs, mb_cols_left); - const int ymbs = MIN(nmbs, mb_rows_left); - int x, y; - - skip_coeff = 1; - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff; - } - } - + skip_coeff = m->mbmi.mb_skip_coeff; vp9_write(bc, skip_coeff, vp9_get_pred_prob(pc, xd, PRED_MBSKIP)); } @@ -967,7 +955,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, } static void write_mb_modes_kf(const VP9_COMP *cpi, - const MODE_INFO *m, + MODE_INFO *m, vp9_writer *bc, int mb_rows_left, int mb_cols_left) { const VP9_COMMON *const c = &cpi->common; @@ -986,18 +974,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { skip_coeff = 1; } else { - const int nmbs = 1 << m->mbmi.sb_type; - const int xmbs = MIN(nmbs, mb_cols_left); - const int ymbs = MIN(nmbs, mb_rows_left); - int x, y; - - skip_coeff = 1; - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff; - } - } - + skip_coeff = m->mbmi.mb_skip_coeff; vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP)); } @@ -1055,30 +1032,275 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, } } +#if CONFIG_CODE_NONZEROCOUNT +static void write_nzc(VP9_COMMON *const cm, + uint16_t nzc, + int nzc_context, + TX_SIZE tx_size, + int ref, + int type, + vp9_writer* const bc) { + int c, e; + c = codenzc(nzc); + if (tx_size == TX_32X32) { + write_token(bc, vp9_nzc32x32_tree, + cm->fc.nzc_probs_32x32[nzc_context][ref][type], + vp9_nzc32x32_encodings + c); + // cm->fc.nzc_counts_32x32[nzc_context][ref][type][c]++; + } else if (tx_size == TX_16X16) { + write_token(bc, vp9_nzc16x16_tree, + cm->fc.nzc_probs_16x16[nzc_context][ref][type], + vp9_nzc16x16_encodings + c); + // cm->fc.nzc_counts_16x16[nzc_context][ref][type][c]++; + } else if (tx_size == TX_8X8) { + write_token(bc, vp9_nzc8x8_tree, + cm->fc.nzc_probs_8x8[nzc_context][ref][type], + vp9_nzc8x8_encodings + c); + // cm->fc.nzc_counts_8x8[nzc_context][ref][type][c]++; + } else if (tx_size == TX_4X4) { + write_token(bc, vp9_nzc4x4_tree, + cm->fc.nzc_probs_4x4[nzc_context][ref][type], + vp9_nzc4x4_encodings + c); + // cm->fc.nzc_counts_4x4[nzc_context][ref][type][c]++; + } else { + assert(0); + } + + if ((e = extranzcbits(c))) { + int x = nzc - basenzcvalue(c); + while (e--) + vp9_write(bc, (x >> e) & 1, Pcat_nzc[nzc_context][c - 3][e]); + } +} + +static void write_nzcs_sb64(VP9_COMP *cpi, + MACROBLOCKD *xd, + int mb_row, + int mb_col, + vp9_writer* const bc) { + VP9_COMMON *const cm = &cpi->common; + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_32X32: + for (j = 0; j < 256; j += 64) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc); + } + for (j = 256; j < 384; j += 64) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 1, bc); + } + break; + + case TX_16X16: + for (j = 0; j < 256; j += 16) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc); + } + for (j = 256; j < 384; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc); + } + break; + + case TX_8X8: + for (j = 0; j < 256; j += 4) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc); + } + for (j = 256; j < 384; j += 4) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); + } + break; + + case TX_4X4: + for (j = 0; j < 256; ++j) { + nzc_context = vp9_get_nzc_context_y_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc); + } + for (j = 256; j < 384; ++j) { + nzc_context = vp9_get_nzc_context_uv_sb64(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); + } + break; + + default: + break; + } +} + +static void write_nzcs_sb32(VP9_COMP *cpi, + MACROBLOCKD *xd, + int mb_row, + int mb_col, + vp9_writer* const bc) { + VP9_COMMON *const cm = &cpi->common; + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_32X32: + for (j = 0; j < 64; j += 64) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_32X32, ref, 0, bc); + } + for (j = 64; j < 96; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc); + } + break; + + case TX_16X16: + for (j = 0; j < 64; j += 16) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc); + } + for (j = 64; j < 96; j += 16) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 1, bc); + } + break; + + case TX_8X8: + for (j = 0; j < 64; j += 4) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc); + } + for (j = 64; j < 96; j += 4) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); + } + break; + + case TX_4X4: + for (j = 0; j < 64; ++j) { + nzc_context = vp9_get_nzc_context_y_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc); + } + for (j = 64; j < 96; ++j) { + nzc_context = vp9_get_nzc_context_uv_sb32(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); + } + break; + + default: + break; + } +} + +static void write_nzcs_mb16(VP9_COMP *cpi, + MACROBLOCKD *xd, + int mb_row, + int mb_col, + vp9_writer* const bc) { + VP9_COMMON *const cm = &cpi->common; + MODE_INFO *m = xd->mode_info_context; + MB_MODE_INFO *const mi = &m->mbmi; + int j, nzc_context; + const int ref = m->mbmi.ref_frame != INTRA_FRAME; + + assert(mb_col == get_mb_col(xd)); + assert(mb_row == get_mb_row(xd)); + + if (mi->mb_skip_coeff) + return; + + switch (mi->txfm_size) { + case TX_16X16: + for (j = 0; j < 16; j += 16) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_16X16, ref, 0, bc); + } + for (j = 16; j < 24; j += 4) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); + } + break; + + case TX_8X8: + for (j = 0; j < 16; j += 4) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 0, bc); + } + if (mi->mode == I8X8_PRED || mi->mode == SPLITMV) { + for (j = 16; j < 24; ++j) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); + } + } else { + for (j = 16; j < 24; j += 4) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_8X8, ref, 1, bc); + } + } + break; + + case TX_4X4: + for (j = 0; j < 16; ++j) { + nzc_context = vp9_get_nzc_context_y_mb16(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 0, bc); + } + for (j = 16; j < 24; ++j) { + nzc_context = vp9_get_nzc_context_uv_mb16(cm, m, mb_row, mb_col, j); + write_nzc(cm, m->mbmi.nzcs[j], nzc_context, TX_4X4, ref, 1, bc); + } + break; + + default: + break; + } +} +#endif + static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, TOKENEXTRA **tok, TOKENEXTRA *tok_end, int mb_row, int mb_col) { - VP9_COMMON *const c = &cpi->common; + VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; xd->mode_info_context = m; - xd->left_available = mb_col > c->cur_tile_mb_col_start; - xd->right_available = - (mb_col + (1 << m->mbmi.sb_type)) < c->cur_tile_mb_col_end; - xd->up_available = mb_row > 0; - if (c->frame_type == KEY_FRAME) { + set_mb_row(&cpi->common, xd, mb_row, (1 << m->mbmi.sb_type)); + set_mb_col(&cpi->common, xd, mb_col, (1 << m->mbmi.sb_type)); + if (cm->frame_type == KEY_FRAME) { write_mb_modes_kf(cpi, m, bc, - c->mb_rows - mb_row, c->mb_cols - mb_col); + cm->mb_rows - mb_row, cm->mb_cols - mb_col); #ifdef ENTROPY_STATS active_section = 8; #endif } else { pack_inter_mode_mvs(cpi, m, bc, - c->mb_rows - mb_row, c->mb_cols - mb_col); + cm->mb_rows - mb_row, cm->mb_cols - mb_col); #ifdef ENTROPY_STATS active_section = 1; #endif } +#if CONFIG_CODE_NONZEROCOUNT + if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) + write_nzcs_sb64(cpi, xd, mb_row, mb_col, bc); + else if (m->mbmi.sb_type == BLOCK_SIZE_SB32X32) + write_nzcs_sb32(cpi, xd, mb_row, mb_col, bc); + else + write_nzcs_mb16(cpi, xd, mb_row, mb_col, bc); +#endif assert(*tok < tok_end); pack_mb_tokens(bc, tok, tok_end); @@ -1232,6 +1454,157 @@ static void build_coeff_contexts(VP9_COMP *cpi) { cpi->frame_branch_ct_32x32, BLOCK_TYPES); } +#if CONFIG_CODE_NONZEROCOUNT +static void update_nzc_probs_common(VP9_COMP* cpi, + vp9_writer* const bc, + int block_size) { + VP9_COMMON *cm = &cpi->common; + int c, r, b, t; + int update[2] = {0, 0}; + int savings = 0; + int tokens, nodes; + const vp9_tree_index *nzc_tree; + const struct vp9_token_struct *nzc_encodings; + vp9_prob *new_nzc_probs; + vp9_prob *old_nzc_probs; + unsigned int *nzc_counts; + unsigned int (*nzc_branch_ct)[2]; + vp9_prob upd; + + if (block_size == 32) { + tokens = NZC32X32_TOKENS; + nzc_tree = vp9_nzc32x32_tree; + nzc_encodings = vp9_nzc32x32_encodings; + old_nzc_probs = cm->fc.nzc_probs_32x32[0][0][0]; + new_nzc_probs = cpi->frame_nzc_probs_32x32[0][0][0]; + nzc_counts = cm->fc.nzc_counts_32x32[0][0][0]; + nzc_branch_ct = cpi->frame_nzc_branch_ct_32x32[0][0][0]; + upd = NZC_UPDATE_PROB_32X32; + } else if (block_size == 16) { + tokens = NZC16X16_TOKENS; + nzc_tree = vp9_nzc16x16_tree; + nzc_encodings = vp9_nzc16x16_encodings; + old_nzc_probs = cm->fc.nzc_probs_16x16[0][0][0]; + new_nzc_probs = cpi->frame_nzc_probs_16x16[0][0][0]; + nzc_counts = cm->fc.nzc_counts_16x16[0][0][0]; + nzc_branch_ct = cpi->frame_nzc_branch_ct_16x16[0][0][0]; + upd = NZC_UPDATE_PROB_16X16; + } else if (block_size == 8) { + tokens = NZC8X8_TOKENS; + nzc_tree = vp9_nzc8x8_tree; + nzc_encodings = vp9_nzc8x8_encodings; + old_nzc_probs = cm->fc.nzc_probs_8x8[0][0][0]; + new_nzc_probs = cpi->frame_nzc_probs_8x8[0][0][0]; + nzc_counts = cm->fc.nzc_counts_8x8[0][0][0]; + nzc_branch_ct = cpi->frame_nzc_branch_ct_8x8[0][0][0]; + upd = NZC_UPDATE_PROB_8X8; + } else { + nzc_tree = vp9_nzc4x4_tree; + nzc_encodings = vp9_nzc4x4_encodings; + tokens = NZC4X4_TOKENS; + old_nzc_probs = cm->fc.nzc_probs_4x4[0][0][0]; + new_nzc_probs = cpi->frame_nzc_probs_4x4[0][0][0]; + nzc_counts = cm->fc.nzc_counts_4x4[0][0][0]; + nzc_branch_ct = cpi->frame_nzc_branch_ct_4x4[0][0][0]; + upd = NZC_UPDATE_PROB_4X4; + } + nodes = tokens - 1; + // Get the new probabilities and the branch counts + for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { + for (r = 0; r < REF_TYPES; ++r) { + for (b = 0; b < BLOCK_TYPES; ++b) { + int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; + int offset_nodes = offset * nodes; + int offset_tokens = offset * tokens; + vp9_tree_probs_from_distribution(tokens, + nzc_encodings, nzc_tree, + new_nzc_probs + offset_nodes, + nzc_branch_ct + offset_nodes, + nzc_counts + offset_tokens); + } + } + } + + for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { + for (r = 0; r < REF_TYPES; ++r) { + for (b = 0; b < BLOCK_TYPES; ++b) { + int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; + int offset_nodes = offset * nodes; + for (t = 0; t < nodes; ++t) { + vp9_prob newp = new_nzc_probs[offset_nodes + t]; + vp9_prob oldp = old_nzc_probs[offset_nodes + t]; + int s, u = 0; +#if defined(SEARCH_NEWP) + s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes], + oldp, &newp, upd); + if (s > 0 && newp != oldp) + u = 1; + if (u) + savings += s - (int)(vp9_cost_zero(upd)); + else + savings -= (int)(vp9_cost_zero(upd)); +#else + s = prob_update_savings(nzc_branch_ct[offset_nodes], + oldp, newp, upd); + if (s > 0) + u = 1; + if (u) + savings += s; +#endif + update[u]++; + } + } + } + } + if (update[1] == 0 || savings < 0) { + vp9_write_bit(bc, 0); + } else { + vp9_write_bit(bc, 1); + for (c = 0; c < MAX_NZC_CONTEXTS; ++c) { + for (r = 0; r < REF_TYPES; ++r) { + for (b = 0; b < BLOCK_TYPES; ++b) { + int offset = c * REF_TYPES * BLOCK_TYPES + r * BLOCK_TYPES + b; + int offset_nodes = offset * nodes; + for (t = 0; t < nodes; ++t) { + vp9_prob newp = new_nzc_probs[offset_nodes + t]; + vp9_prob *oldp = &old_nzc_probs[offset_nodes + t]; + int s, u = 0; +#if defined(SEARCH_NEWP) + s = prob_diff_update_savings_search(nzc_branch_ct[offset_nodes], + *oldp, &newp, upd); + if (s > 0 && newp != *oldp) + u = 1; +#else + s = prob_update_savings(nzc_branch_ct[offset_nodes], + *oldp, newp, upd); + if (s > 0) + u = 1; +#endif + vp9_write(bc, u, upd); + if (u) { + /* send/use new probability */ + write_prob_diff_update(bc, newp, *oldp); + *oldp = newp; + } + } + } + } + } + } +} + +static void update_nzc_probs(VP9_COMP* cpi, + vp9_writer* const bc) { + update_nzc_probs_common(cpi, bc, 4); + if (cpi->common.txfm_mode != ONLY_4X4) + update_nzc_probs_common(cpi, bc, 8); + if (cpi->common.txfm_mode > ALLOW_8X8) + update_nzc_probs_common(cpi, bc, 16); + if (cpi->common.txfm_mode > ALLOW_16X16) + update_nzc_probs_common(cpi, bc, 32); +} +#endif // CONFIG_CODE_NONZEROCOUNT + static void update_coef_probs_common(vp9_writer* const bc, #ifdef ENTROPY_STATS VP9_COMP *cpi, @@ -1253,7 +1626,7 @@ static void update_coef_probs_common(vp9_writer* const bc, for (k = 0; k < COEF_BANDS; ++k) { int prev_coef_savings[ENTROPY_NODES] = {0}; for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { - for (t = 0; t < ENTROPY_NODES; ++t) { + for (t = CONFIG_CODE_NONZEROCOUNT; t < ENTROPY_NODES; ++t) { vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t]; const vp9_prob upd = COEF_UPDATE_PROB; @@ -1299,7 +1672,7 @@ static void update_coef_probs_common(vp9_writer* const bc, int prev_coef_savings[ENTROPY_NODES] = {0}; for (l = 0; l < PREV_COEF_CONTEXTS; ++l) { // calc probs and branch cts for this frame only - for (t = 0; t < ENTROPY_NODES; ++t) { + for (t = CONFIG_CODE_NONZEROCOUNT; t < ENTROPY_NODES; ++t) { vp9_prob newp = new_frame_coef_probs[i][j][k][l][t]; vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t; const vp9_prob upd = COEF_UPDATE_PROB; @@ -1900,6 +2273,24 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, cpi->common.fc.coef_probs_16x16); vp9_copy(cpi->common.fc.pre_coef_probs_32x32, cpi->common.fc.coef_probs_32x32); +#if CONFIG_CODE_NONZEROCOUNT + vp9_copy(cpi->common.fc.pre_nzc_probs_4x4, + cpi->common.fc.nzc_probs_4x4); + vp9_copy(cpi->common.fc.pre_nzc_probs_8x8, + cpi->common.fc.nzc_probs_8x8); + vp9_copy(cpi->common.fc.pre_nzc_probs_16x16, + cpi->common.fc.nzc_probs_16x16); + vp9_copy(cpi->common.fc.pre_nzc_probs_32x32, + cpi->common.fc.nzc_probs_32x32); + // NOTE that if the counts are reset, we also need to uncomment + // the count updates in the write_nzc function + /* + vp9_zero(cpi->common.fc.nzc_counts_4x4); + vp9_zero(cpi->common.fc.nzc_counts_8x8); + vp9_zero(cpi->common.fc.nzc_counts_16x16); + vp9_zero(cpi->common.fc.nzc_counts_32x32); + */ +#endif vp9_copy(cpi->common.fc.pre_sb_ymode_prob, cpi->common.fc.sb_ymode_prob); vp9_copy(cpi->common.fc.pre_ymode_prob, cpi->common.fc.ymode_prob); vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); @@ -1916,6 +2307,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_zero(cpi->common.fc.mv_ref_ct) update_coef_probs(cpi, &header_bc); +#if CONFIG_CODE_NONZEROCOUNT + update_nzc_probs(cpi, &header_bc); +#endif #ifdef ENTROPY_STATS active_section = 2; @@ -1927,8 +2321,9 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, int k; vp9_update_skip_probs(cpi); - for (k = 0; k < MBSKIP_CONTEXTS; ++k) + for (k = 0; k < MBSKIP_CONTEXTS; ++k) { vp9_write_literal(&header_bc, pc->mbskip_pred_probs[k], 8); + } } if (pc->frame_type == KEY_FRAME) { diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 560c37171..439006156 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -151,6 +151,12 @@ struct macroblock { unsigned char *active_ptr; vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES]; +#if CONFIG_CODE_NONZEROCOUNT + unsigned int nzc_costs_4x4[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][17]; + unsigned int nzc_costs_8x8[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][65]; + unsigned int nzc_costs_16x16[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][257]; + unsigned int nzc_costs_32x32[MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][1025]; +#endif int optimize; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index a4dbdc5a8..f2be96dd7 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -630,10 +630,6 @@ static void set_offsets(VP9_COMP *cpi, const int idx_map = mb_row * cm->mb_cols + mb_col; const int idx_str = xd->mode_info_stride * mb_row + mb_col; -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 2 && - mb_row == 4 && mb_col == 5); -#endif // entropy context structures xd->above_context = cm->above_context + mb_col; xd->left_context = cm->left_context + (mb_row & 3); @@ -668,15 +664,8 @@ static void set_offsets(VP9_COMP *cpi, // Set up distance of MB to edge of frame in 1/8th pel units block_size >>= 4; // in macroblock units assert(!(mb_col & (block_size - 1)) && !(mb_row & (block_size - 1))); - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; - - // Are edges available for intra prediction? - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col > cm->cur_tile_mb_col_start); - xd->right_available = (mb_col + block_size < cm->cur_tile_mb_col_end); + set_mb_row(cm, xd, mb_row, block_size); + set_mb_col(cm, xd, mb_col, block_size); /* set up source buffers */ setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL); @@ -891,7 +880,7 @@ static void pick_sb64_modes(VP9_COMP *cpi, } } -static void update_stats(VP9_COMP *cpi) { +static void update_stats(VP9_COMP *cpi, int mb_row, int mb_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -945,6 +934,9 @@ static void update_stats(VP9_COMP *cpi) { if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) cpi->inter_zz_count++; } +#if CONFIG_CODE_NONZEROCOUNT + vp9_update_nzc_counts(&cpi->common, xd, mb_row, mb_col); +#endif } static void encode_sb(VP9_COMP *cpi, @@ -963,8 +955,9 @@ static void encode_sb(VP9_COMP *cpi, encode_superblock32(cpi, tp, output_enabled, mb_row, mb_col); - if (output_enabled) - update_stats(cpi); + if (output_enabled) { + update_stats(cpi, mb_row, mb_col); + } if (output_enabled) { (*tp)->Token = EOSB_TOKEN; @@ -992,12 +985,13 @@ static void encode_sb(VP9_COMP *cpi, encode_macroblock(cpi, tp, output_enabled, mb_row + y_idx, mb_col + x_idx); - if (output_enabled) - update_stats(cpi); + if (output_enabled) { + update_stats(cpi, mb_row + y_idx, mb_col + x_idx); + } if (output_enabled) { (*tp)->Token = EOSB_TOKEN; - (*tp)++; + (*tp)++; if (mb_row + y_idx < cm->mb_rows) cpi->tplist[mb_row + y_idx].stop = *tp; } @@ -1029,7 +1023,7 @@ static void encode_sb64(VP9_COMP *cpi, update_state(cpi, &x->sb64_context, 64, 1); encode_superblock64(cpi, tp, 1, mb_row, mb_col); - update_stats(cpi); + update_stats(cpi, mb_row, mb_col); (*tp)->Token = EOSB_TOKEN; (*tp)++; @@ -1286,6 +1280,12 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(cpi->coef_counts_8x8); vp9_zero(cpi->coef_counts_16x16); vp9_zero(cpi->coef_counts_32x32); +#if CONFIG_CODE_NONZEROCOUNT + vp9_zero(cm->fc.nzc_counts_4x4); + vp9_zero(cm->fc.nzc_counts_8x8); + vp9_zero(cm->fc.nzc_counts_16x16); + vp9_zero(cm->fc.nzc_counts_32x32); +#endif #if CONFIG_NEW_MVREF vp9_zero(cpi->mb_mv_ref_count); #endif @@ -1327,30 +1327,34 @@ static void encode_frame_internal(VP9_COMP *cpi) { { // Take tiles into account and give start/end MB - int tile_col; + int tile_col, tile_row; TOKENEXTRA *tp = cpi->tok; - for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { - TOKENEXTRA *tp_old = tp; - // For each row of SBs in the frame - vp9_get_tile_col_offsets(cm, tile_col); - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) { - encode_sb_row(cpi, mb_row, &tp, &totalrate); + for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) { + vp9_get_tile_row_offsets(cm, tile_row); + + for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { + TOKENEXTRA *tp_old = tp; + + // For each row of SBs in the frame + vp9_get_tile_col_offsets(cm, tile_col); + for (mb_row = cm->cur_tile_mb_row_start; + mb_row < cm->cur_tile_mb_row_end; mb_row += 4) { + encode_sb_row(cpi, mb_row, &tp, &totalrate); + } + cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old); } - cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old); } } vpx_usec_timer_mark(&emr_timer); cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer); - } // 256 rate units to the bit, // projected_frame_size in units of BYTES cpi->projected_frame_size = totalrate >> 8; - #if 0 // Keep record of the total distortion this time around for future use cpi->last_frame_distortion = cpi->frame_distortion; @@ -1930,6 +1934,135 @@ static void update_sb64_skip_coeff_state(VP9_COMP *cpi, } } +#if CONFIG_CODE_NONZEROCOUNT +static void gather_nzcs_mb16(VP9_COMMON *const cm, + MACROBLOCKD *xd) { + int i; + vpx_memset(xd->mode_info_context->mbmi.nzcs, 0, + 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0])); + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_4X4: + for (i = 0; i < 24; ++i) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + case TX_8X8: + for (i = 0; i < 16; i += 4) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + if (xd->mode_info_context->mbmi.mode == I8X8_PRED || + xd->mode_info_context->mbmi.mode == SPLITMV) { + for (i = 16; i < 24; ++i) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + } else { + for (i = 16; i < 24; i += 4) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + } + break; + + case TX_16X16: + xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0]; + for (i = 16; i < 24; i += 4) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + default: + break; + } +} + +static void gather_nzcs_sb32(VP9_COMMON *const cm, + MACROBLOCKD *xd) { + int i, j; + MODE_INFO *m = xd->mode_info_context; + int mis = cm->mode_info_stride; + vpx_memset(m->mbmi.nzcs, 0, + 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0])); + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_4X4: + for (i = 0; i < 96; ++i) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + case TX_8X8: + for (i = 0; i < 96; i += 4) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + case TX_16X16: + for (i = 0; i < 96; i += 16) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + case TX_32X32: + xd->mode_info_context->mbmi.nzcs[0] = xd->nzcs[0]; + for (i = 64; i < 96; i += 16) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + default: + break; + } + for (i = 0; i < 2; ++i) + for (j = 0; j < 2; ++j) { + if (i == 0 && j == 0) continue; + vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs, + 384 * sizeof(m->mbmi.nzcs[0])); + } +} + +static void gather_nzcs_sb64(VP9_COMMON *const cm, + MACROBLOCKD *xd) { + int i, j; + MODE_INFO *m = xd->mode_info_context; + int mis = cm->mode_info_stride; + vpx_memset(xd->mode_info_context->mbmi.nzcs, 0, + 384 * sizeof(xd->mode_info_context->mbmi.nzcs[0])); + switch (xd->mode_info_context->mbmi.txfm_size) { + case TX_4X4: + for (i = 0; i < 384; ++i) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + case TX_8X8: + for (i = 0; i < 384; i += 4) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + case TX_16X16: + for (i = 0; i < 384; i += 16) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + case TX_32X32: + for (i = 0; i < 384; i += 64) { + xd->mode_info_context->mbmi.nzcs[i] = xd->nzcs[i]; + } + break; + + default: + break; + } + for (i = 0; i < 4; ++i) + for (j = 0; j < 4; ++j) { + if (i == 0 && j == 0) continue; + vpx_memcpy((m + j + mis * i)->mbmi.nzcs, m->mbmi.nzcs, + 384 * sizeof(m->mbmi.nzcs[0])); + } +} +#endif + static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mb_row, int mb_col) { @@ -1944,8 +2077,8 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, assert(!xd->mode_info_context->mbmi.sb_type); #ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 2 && - mb_row == 5 && mb_col == 18); + enc_debug = (cpi->common.current_video_frame == 1 && + mb_row == 0 && mb_col == 0 && output_enabled); if (enc_debug) printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled); #endif @@ -1997,14 +2130,14 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } #endif if (mbmi->mode == B_PRED) { - vp9_encode_intra16x16mbuv(x); + vp9_encode_intra16x16mbuv(cm, x); vp9_encode_intra4x4mby(x); } else if (mbmi->mode == I8X8_PRED) { vp9_encode_intra8x8mby(x); vp9_encode_intra8x8mbuv(x); } else { - vp9_encode_intra16x16mbuv(x); - vp9_encode_intra16x16mby(x); + vp9_encode_intra16x16mbuv(cm, x); + vp9_encode_intra16x16mby(cm, x); } if (output_enabled) @@ -2051,7 +2184,7 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } if (!x->skip) { - vp9_encode_inter16x16(x, mb_row, mb_col); + vp9_encode_inter16x16(cm, x, mb_row, mb_col); // Clear mb_skip_coeff if mb_no_coeff_skip is not set if (!cpi->common.mb_no_coeff_skip) @@ -2079,12 +2212,12 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } if (!x->skip) { -#if 0 // def ENC_DEBUG +#ifdef ENC_DEBUG if (enc_debug) { int i, j; printf("\n"); printf("qcoeff\n"); - for (i = 0; i < 400; i++) { + for (i = 0; i < 384; i++) { printf("%3d ", xd->qcoeff[i]); if (i % 16 == 15) printf("\n"); } @@ -2131,6 +2264,9 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } #endif +#if CONFIG_CODE_NONZEROCOUNT + gather_nzcs_mb16(cm, xd); +#endif vp9_tokenize_mb(cpi, xd, t, !output_enabled); } else { @@ -2197,6 +2333,12 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, unsigned int segment_id = mi->mbmi.segment_id; const int mis = cm->mode_info_stride; +#ifdef ENC_DEBUG + enc_debug = (cpi->common.current_video_frame == 1 && + mb_row == 0 && mb_col == 0 && output_enabled); + if (enc_debug) + printf("Encode SB32 %d %d output %d\n", mb_row, mb_col, output_enabled); +#endif if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { adjust_act_zbin(cpi, x); @@ -2294,8 +2436,8 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sby_32x32(x); vp9_quantize_sbuv_16x16(x); if (x->optimize) { - vp9_optimize_sby_32x32(x); - vp9_optimize_sbuv_16x16(x); + vp9_optimize_sby_32x32(cm, x); + vp9_optimize_sbuv_16x16(cm, x); } vp9_inverse_transform_sby_32x32(xd); vp9_inverse_transform_sbuv_16x16(xd); @@ -2306,8 +2448,8 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sby_16x16(x); vp9_quantize_sbuv_16x16(x); if (x->optimize) { - vp9_optimize_sby_16x16(x); - vp9_optimize_sbuv_16x16(x); + vp9_optimize_sby_16x16(cm, x); + vp9_optimize_sbuv_16x16(cm, x); } vp9_inverse_transform_sby_16x16(xd); vp9_inverse_transform_sbuv_16x16(xd); @@ -2318,8 +2460,8 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sby_8x8(x); vp9_quantize_sbuv_8x8(x); if (x->optimize) { - vp9_optimize_sby_8x8(x); - vp9_optimize_sbuv_8x8(x); + vp9_optimize_sby_8x8(cm, x); + vp9_optimize_sbuv_8x8(cm, x); } vp9_inverse_transform_sby_8x8(xd); vp9_inverse_transform_sbuv_8x8(xd); @@ -2330,8 +2472,8 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sby_4x4(x); vp9_quantize_sbuv_4x4(x); if (x->optimize) { - vp9_optimize_sby_4x4(x); - vp9_optimize_sbuv_4x4(x); + vp9_optimize_sby_4x4(cm, x); + vp9_optimize_sbuv_4x4(cm, x); } vp9_inverse_transform_sby_4x4(xd); vp9_inverse_transform_sbuv_4x4(xd); @@ -2340,6 +2482,9 @@ static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, } vp9_recon_sby_s_c(xd, dst); vp9_recon_sbuv_s_c(xd, udst, vdst); +#if CONFIG_CODE_NONZEROCOUNT + gather_nzcs_sb32(cm, xd); +#endif vp9_tokenize_sb(cpi, xd, t, !output_enabled); } else { @@ -2407,6 +2552,12 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, unsigned int segment_id = mi->mbmi.segment_id; const int mis = cm->mode_info_stride; +#ifdef ENC_DEBUG + enc_debug = (cpi->common.current_video_frame == 1 && + mb_row == 0 && mb_col == 0 && output_enabled); + if (enc_debug) + printf("Encode SB64 %d %d output %d\n", mb_row, mb_col, output_enabled); +#endif if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { adjust_act_zbin(cpi, x); @@ -2502,8 +2653,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sb64y_32x32(x); vp9_quantize_sb64uv_32x32(x); if (x->optimize) { - vp9_optimize_sb64y_32x32(x); - vp9_optimize_sb64uv_32x32(x); + vp9_optimize_sb64y_32x32(cm, x); + vp9_optimize_sb64uv_32x32(cm, x); } vp9_inverse_transform_sb64y_32x32(xd); vp9_inverse_transform_sb64uv_32x32(xd); @@ -2514,8 +2665,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sb64y_16x16(x); vp9_quantize_sb64uv_16x16(x); if (x->optimize) { - vp9_optimize_sb64y_16x16(x); - vp9_optimize_sb64uv_16x16(x); + vp9_optimize_sb64y_16x16(cm, x); + vp9_optimize_sb64uv_16x16(cm, x); } vp9_inverse_transform_sb64y_16x16(xd); vp9_inverse_transform_sb64uv_16x16(xd); @@ -2526,8 +2677,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sb64y_8x8(x); vp9_quantize_sb64uv_8x8(x); if (x->optimize) { - vp9_optimize_sb64y_8x8(x); - vp9_optimize_sb64uv_8x8(x); + vp9_optimize_sb64y_8x8(cm, x); + vp9_optimize_sb64uv_8x8(cm, x); } vp9_inverse_transform_sb64y_8x8(xd); vp9_inverse_transform_sb64uv_8x8(xd); @@ -2538,8 +2689,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sb64y_4x4(x); vp9_quantize_sb64uv_4x4(x); if (x->optimize) { - vp9_optimize_sb64y_4x4(x); - vp9_optimize_sb64uv_4x4(x); + vp9_optimize_sb64y_4x4(cm, x); + vp9_optimize_sb64uv_4x4(cm, x); } vp9_inverse_transform_sb64y_4x4(xd); vp9_inverse_transform_sb64uv_4x4(xd); @@ -2548,7 +2699,9 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, } vp9_recon_sb64y_s_c(xd, dst); vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst); - +#if CONFIG_CODE_NONZEROCOUNT + gather_nzcs_sb64(cm, &x->e_mbd); +#endif vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled); } else { // FIXME(rbultje): not tile-aware (mi - 1) diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 75c8ea8f3..d3b595bd8 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -25,7 +25,7 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { mbmi->uv_mode = DC_PRED; mbmi->ref_frame = INTRA_FRAME; - vp9_encode_intra16x16mby(x); + vp9_encode_intra16x16mby(&cpi->common, x); } else { int i; @@ -72,7 +72,7 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) { vp9_encode_intra4x4block(mb, i); } -void vp9_encode_intra16x16mby(MACROBLOCK *x) { +void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; BLOCK *b = &x->block[0]; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; @@ -86,21 +86,21 @@ void vp9_encode_intra16x16mby(MACROBLOCK *x) { vp9_transform_mby_16x16(x); vp9_quantize_mby_16x16(x); if (x->optimize) - vp9_optimize_mby_16x16(x); + vp9_optimize_mby_16x16(cm, x); vp9_inverse_transform_mby_16x16(xd); break; case TX_8X8: vp9_transform_mby_8x8(x); vp9_quantize_mby_8x8(x); if (x->optimize) - vp9_optimize_mby_8x8(x); + vp9_optimize_mby_8x8(cm, x); vp9_inverse_transform_mby_8x8(xd); break; default: vp9_transform_mby_4x4(x); vp9_quantize_mby_4x4(x); if (x->optimize) - vp9_optimize_mby_4x4(x); + vp9_optimize_mby_4x4(cm, x); vp9_inverse_transform_mby_4x4(xd); break; } @@ -108,7 +108,7 @@ void vp9_encode_intra16x16mby(MACROBLOCK *x) { vp9_recon_mby(xd); } -void vp9_encode_intra16x16mbuv(MACROBLOCK *x) { +void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; @@ -122,14 +122,14 @@ void vp9_encode_intra16x16mbuv(MACROBLOCK *x) { vp9_transform_mbuv_4x4(x); vp9_quantize_mbuv_4x4(x); if (x->optimize) - vp9_optimize_mbuv_4x4(x); + vp9_optimize_mbuv_4x4(cm, x); vp9_inverse_transform_mbuv_4x4(xd); break; default: // 16x16 or 8x8 vp9_transform_mbuv_8x8(x); vp9_quantize_mbuv_8x8(x); if (x->optimize) - vp9_optimize_mbuv_8x8(x); + vp9_optimize_mbuv_8x8(cm, x); vp9_inverse_transform_mbuv_8x8(xd); break; } diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index b017673ee..0b19b5652 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -14,8 +14,8 @@ #include "vp9/encoder/vp9_onyx_int.h" int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); -void vp9_encode_intra16x16mby(MACROBLOCK *x); -void vp9_encode_intra16x16mbuv(MACROBLOCK *x); +void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x); +void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra4x4mby(MACROBLOCK *mb); void vp9_encode_intra4x4block(MACROBLOCK *x, int ib); void vp9_encode_intra8x8mby(MACROBLOCK *x); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index b2ee800cd..c0386459d 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -491,7 +491,8 @@ static int trellis_get_coeff_context(int token) { return vp9_get_coef_context(&recent_energy, token); } -static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, +static void optimize_b(VP9_COMMON *const cm, + MACROBLOCK *mb, int ib, PLANE_TYPE type, const int16_t *dequant_ptr, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int tx_size) { @@ -512,26 +513,73 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, int default_eob; int const *scan; const int mul = 1 + (tx_size == TX_32X32); + TX_TYPE tx_type; +#if CONFIG_CODE_NONZEROCOUNT + // TODO(debargha): the dynamic programming approach used in this function + // is not compatible with the true rate cost when nzcs are used. Note + // the total rate is the sum of the nzc rate and the indicvidual token + // rates. The latter part can be optimized in this function, but because + // the nzc rate is a function of all the other tokens without a Markov + // relationship this rate cannot be considered correctly. + // The current implementation uses a suboptimal approach to account for + // the nzc rates somewhat, but in reality the optimization approach needs + // to change substantially. + uint16_t nzc = xd->nzcs[ib]; + uint16_t nzc0, nzc1; + uint16_t final_nzc = 0, final_nzc_exp; + int nzc_context = vp9_get_nzc_context(cm, xd, ib); + unsigned int *nzc_cost; + nzc0 = nzc1 = nzc; +#endif switch (tx_size) { default: case TX_4X4: default_eob = 16; - // FIXME(rbultje): although optimize_b currently isn't called for - // intra4x4, this should be changed to be adst-compatible - scan = vp9_default_zig_zag1d_4x4; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type]; +#endif + // NOTE: this isn't called (for intra4x4 modes), but will be left in + // since it could be used later + tx_type = get_tx_type_4x4(&mb->e_mbd, &xd->block[ib]); + if (tx_type != DCT_DCT) { + switch (tx_type) { + case ADST_DCT: + scan = vp9_row_scan_4x4; + break; + + case DCT_ADST: + scan = vp9_col_scan_4x4; + break; + + default: + scan = vp9_default_zig_zag1d_4x4; + break; + } + } else { + scan = vp9_default_zig_zag1d_4x4; + } break; case TX_8X8: scan = vp9_default_zig_zag1d_8x8; default_eob = 64; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type]; +#endif break; case TX_16X16: scan = vp9_default_zig_zag1d_16x16; default_eob = 256; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type]; +#endif break; case TX_32X32: scan = vp9_default_zig_zag1d_32x32; default_eob = 1024; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type]; +#endif break; } @@ -542,7 +590,11 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, rddiv = mb->rddiv; memset(best_index, 0, sizeof(best_index)); /* Initialize the sentinel node of the trellis. */ +#if CONFIG_CODE_NONZEROCOUNT + tokens[eob][0].rate = nzc_cost[nzc]; +#else tokens[eob][0].rate = 0; +#endif tokens[eob][0].error = 0; tokens[eob][0].next = default_eob; tokens[eob][0].token = DCT_EOB_TOKEN; @@ -551,6 +603,9 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, next = eob; for (i = eob; i-- > i0;) { int base_bits, d2, dx; +#if CONFIG_CODE_NONZEROCOUNT + int new_nzc0, new_nzc1; +#endif rc = scan[i]; x = qcoeff_ptr[rc]; @@ -584,6 +639,10 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, tokens[i][0].token = t0; tokens[i][0].qc = x; best_index[i][0] = best; +#if CONFIG_CODE_NONZEROCOUNT + new_nzc0 = (best ? nzc1 : nzc0); +#endif + /* Evaluate the second possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; @@ -609,6 +668,12 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, DCT_EOB_TOKEN : ZERO_TOKEN; t1 = tokens[next][1].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN; +#if CONFIG_CODE_NONZEROCOUNT + // Account for rate drop because of the nzc change. + // TODO(debargha): Find a better solution + rate0 -= nzc_cost[nzc0] - nzc_cost[nzc0 - 1]; + rate1 -= nzc_cost[nzc1] - nzc_cost[nzc1 - 1]; +#endif } else { t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token; } @@ -641,6 +706,11 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, tokens[i][1].token = best ? t1 : t0; tokens[i][1].qc = x; best_index[i][1] = best; +#if CONFIG_CODE_NONZEROCOUNT + new_nzc1 = (best ? nzc1 : nzc0) - (!x); + nzc0 = new_nzc0; + nzc1 = new_nzc1; +#endif /* Finally, make this the new head of the trellis. */ next = i; } @@ -679,11 +749,18 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; +#if CONFIG_CODE_NONZEROCOUNT + final_nzc_exp = (best ? nzc1 : nzc0); +#endif final_eob = i0 - 1; for (i = next; i < eob; i = next) { x = tokens[i][best].qc; - if (x) + if (x) { final_eob = i; +#if CONFIG_CODE_NONZEROCOUNT + ++final_nzc; +#endif + } rc = scan[i]; qcoeff_ptr[rc] = x; dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; @@ -695,9 +772,13 @@ static void optimize_b(MACROBLOCK *mb, int ib, PLANE_TYPE type, xd->eobs[ib] = final_eob; *a = *l = (final_eob > 0); +#if CONFIG_CODE_NONZEROCOUNT + assert(final_nzc == final_nzc_exp); + xd->nzcs[ib] = final_nzc; +#endif } -void vp9_optimize_mby_4x4(MACROBLOCK *x) { +void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { int b; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; @@ -713,13 +794,13 @@ void vp9_optimize_mby_4x4(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; for (b = 0; b < 16; b++) { - optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, + optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); } } -void vp9_optimize_mbuv_4x4(MACROBLOCK *x) { +void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { int b; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; @@ -735,18 +816,18 @@ void vp9_optimize_mbuv_4x4(MACROBLOCK *x) { tl = (ENTROPY_CONTEXT *)&t_left; for (b = 16; b < 24; b++) { - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); } } -static void optimize_mb_4x4(MACROBLOCK *x) { - vp9_optimize_mby_4x4(x); - vp9_optimize_mbuv_4x4(x); +static void optimize_mb_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { + vp9_optimize_mby_4x4(cm, x); + vp9_optimize_mbuv_4x4(cm, x); } -void vp9_optimize_mby_8x8(MACROBLOCK *x) { +void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { int b; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; @@ -765,14 +846,14 @@ void vp9_optimize_mby_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, + optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant, &above_ec, &left_ec, TX_8X8); a[1] = a[0] = above_ec; l[1] = l[0] = left_ec; } } -void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { +void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { int b; ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context; ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context; @@ -785,17 +866,17 @@ void vp9_optimize_mbuv_8x8(MACROBLOCK *x) { ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b]; ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0; ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant, &above_ec, &left_ec, TX_8X8); } } -static void optimize_mb_8x8(MACROBLOCK *x) { - vp9_optimize_mby_8x8(x); - vp9_optimize_mbuv_8x8(x); +static void optimize_mb_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { + vp9_optimize_mby_8x8(cm, x); + vp9_optimize_mbuv_8x8(cm, x); } -void vp9_optimize_mby_16x16(MACROBLOCK *x) { +void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context; ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context; ENTROPY_CONTEXT ta, tl; @@ -805,16 +886,16 @@ void vp9_optimize_mby_16x16(MACROBLOCK *x) { ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0; tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0; - optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, &ta, &tl, TX_16X16); } -static void optimize_mb_16x16(MACROBLOCK *x) { - vp9_optimize_mby_16x16(x); - vp9_optimize_mbuv_8x8(x); +static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { + vp9_optimize_mby_16x16(cm, x); + vp9_optimize_mbuv_8x8(cm, x); } -void vp9_optimize_sby_32x32(MACROBLOCK *x) { +void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; @@ -823,11 +904,11 @@ void vp9_optimize_sby_32x32(MACROBLOCK *x) { ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; - optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, &ta, &tl, TX_32X32); } -void vp9_optimize_sby_16x16(MACROBLOCK *x) { +void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; @@ -842,12 +923,12 @@ void vp9_optimize_sby_16x16(MACROBLOCK *x) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, ta + x_idx, tl + y_idx, TX_16X16); } } -void vp9_optimize_sby_8x8(MACROBLOCK *x) { +void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; @@ -866,12 +947,12 @@ void vp9_optimize_sby_8x8(MACROBLOCK *x) { for (n = 0; n < 16; n++) { const int x_idx = n & 3, y_idx = n >> 2; - optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, ta + x_idx, tl + y_idx, TX_8X8); } } -void vp9_optimize_sby_4x4(MACROBLOCK *x) { +void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT ta[8], tl[8]; int n; @@ -882,12 +963,12 @@ void vp9_optimize_sby_4x4(MACROBLOCK *x) { for (n = 0; n < 64; n++) { const int x_idx = n & 7, y_idx = n >> 3; - optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, ta + x_idx, tl + y_idx, TX_4X4); } } -void vp9_optimize_sbuv_16x16(MACROBLOCK *x) { +void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; @@ -901,12 +982,12 @@ void vp9_optimize_sbuv_16x16(MACROBLOCK *x) { l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, &above_ec, &left_ec, TX_16X16); } } -void vp9_optimize_sbuv_8x8(MACROBLOCK *x) { +void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; @@ -921,14 +1002,14 @@ void vp9_optimize_sbuv_8x8(MACROBLOCK *x) { l = tl + vp9_block2left_sb[TX_8X8][b]; above_ec = (a[0] + a[1]) != 0; left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, &above_ec, &left_ec, TX_8X8); a[0] = a[1] = above_ec; l[0] = l[1] = left_ec; } } -void vp9_optimize_sbuv_4x4(MACROBLOCK *x) { +void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; @@ -941,12 +1022,12 @@ void vp9_optimize_sbuv_4x4(MACROBLOCK *x) { const int cidx = b >= 80 ? 20 : 16; a = ta + vp9_block2above_sb[TX_4X4][b]; l = tl + vp9_block2left_sb[TX_4X4][b]; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, a, l, TX_4X4); } } -void vp9_optimize_sb64y_32x32(MACROBLOCK *x) { +void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); @@ -965,12 +1046,12 @@ void vp9_optimize_sb64y_32x32(MACROBLOCK *x) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - optimize_b(x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, ta + x_idx, tl + y_idx, TX_32X32); } } -void vp9_optimize_sb64y_16x16(MACROBLOCK *x) { +void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); @@ -993,12 +1074,12 @@ void vp9_optimize_sb64y_16x16(MACROBLOCK *x) { for (n = 0; n < 16; n++) { const int x_idx = n & 3, y_idx = n >> 2; - optimize_b(x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, ta + x_idx, tl + y_idx, TX_16X16); } } -void vp9_optimize_sb64y_8x8(MACROBLOCK *x) { +void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); @@ -1029,12 +1110,12 @@ void vp9_optimize_sb64y_8x8(MACROBLOCK *x) { for (n = 0; n < 64; n++) { const int x_idx = n & 7, y_idx = n >> 3; - optimize_b(x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, ta + x_idx, tl + y_idx, TX_8X8); } } -void vp9_optimize_sb64y_4x4(MACROBLOCK *x) { +void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT ta[16], tl[16]; int n; @@ -1049,12 +1130,12 @@ void vp9_optimize_sb64y_4x4(MACROBLOCK *x) { for (n = 0; n < 256; n++) { const int x_idx = n & 15, y_idx = n >> 4; - optimize_b(x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, + optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, ta + x_idx, tl + y_idx, TX_4X4); } } -void vp9_optimize_sb64uv_32x32(MACROBLOCK *x) { +void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; @@ -1072,12 +1153,12 @@ void vp9_optimize_sb64uv_32x32(MACROBLOCK *x) { l3 = l + 3 * sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); a_ec = (a[0] + a[1] + a1[0] + a1[1] + a2[0] + a2[1] + a3[0] + a3[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, &a_ec, &l_ec, TX_32X32); } } -void vp9_optimize_sb64uv_16x16(MACROBLOCK *x) { +void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; @@ -1094,14 +1175,14 @@ void vp9_optimize_sb64uv_16x16(MACROBLOCK *x) { l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, &above_ec, &left_ec, TX_16X16); a[0] = a[1] = a1[0] = a1[1] = above_ec; l[0] = l[1] = l1[0] = l1[1] = left_ec; } } -void vp9_optimize_sb64uv_8x8(MACROBLOCK *x) { +void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; @@ -1116,14 +1197,14 @@ void vp9_optimize_sb64uv_8x8(MACROBLOCK *x) { l = tl + vp9_block2left_sb64[TX_8X8][b]; above_ec = (a[0] + a[1]) != 0; left_ec = (l[0] + l[1]) != 0; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, &above_ec, &left_ec, TX_8X8); a[0] = a[1] = above_ec; l[0] = l[1] = left_ec; } } -void vp9_optimize_sb64uv_4x4(MACROBLOCK *x) { +void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; @@ -1136,12 +1217,12 @@ void vp9_optimize_sb64uv_4x4(MACROBLOCK *x) { const int cidx = b >= 320 ? 20 : 16; a = ta + vp9_block2above_sb64[TX_4X4][b]; l = tl + vp9_block2left_sb64[TX_4X4][b]; - optimize_b(x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, + optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, a, l, TX_4X4); } } -void vp9_fidct_mb(MACROBLOCK *x) { +void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *const xd = &x->e_mbd; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; @@ -1149,7 +1230,7 @@ void vp9_fidct_mb(MACROBLOCK *x) { vp9_transform_mb_16x16(x); vp9_quantize_mb_16x16(x); if (x->optimize) - optimize_mb_16x16(x); + optimize_mb_16x16(cm, x); vp9_inverse_transform_mb_16x16(xd); } else if (tx_size == TX_8X8) { if (xd->mode_info_context->mbmi.mode == SPLITMV) { @@ -1159,8 +1240,8 @@ void vp9_fidct_mb(MACROBLOCK *x) { vp9_quantize_mby_8x8(x); vp9_quantize_mbuv_4x4(x); if (x->optimize) { - vp9_optimize_mby_8x8(x); - vp9_optimize_mbuv_4x4(x); + vp9_optimize_mby_8x8(cm, x); + vp9_optimize_mbuv_4x4(cm, x); } vp9_inverse_transform_mby_8x8(xd); vp9_inverse_transform_mbuv_4x4(xd); @@ -1168,24 +1249,25 @@ void vp9_fidct_mb(MACROBLOCK *x) { vp9_transform_mb_8x8(x); vp9_quantize_mb_8x8(x); if (x->optimize) - optimize_mb_8x8(x); + optimize_mb_8x8(cm, x); vp9_inverse_transform_mb_8x8(xd); } } else { transform_mb_4x4(x); vp9_quantize_mb_4x4(x); if (x->optimize) - optimize_mb_4x4(x); + optimize_mb_4x4(cm, x); vp9_inverse_transform_mb_4x4(xd); } } -void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col) { +void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, + int mb_row, int mb_col) { MACROBLOCKD *const xd = &x->e_mbd; vp9_build_inter_predictors_mb(xd, mb_row, mb_col); subtract_mb(x); - vp9_fidct_mb(x); + vp9_fidct_mb(cm, x); vp9_recon_mb(xd); } diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 8164bbac2..242afbeae 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -14,6 +14,7 @@ #include "./vpx_config.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/common/vp9_onyxc_int.h" typedef struct { MB_PREDICTION_MODE mode; @@ -23,58 +24,59 @@ typedef struct { struct VP9_ENCODER_RTCD; -void vp9_encode_inter16x16(MACROBLOCK *x, int mb_row, int mb_col); +void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, + int mb_row, int mb_col); void vp9_transform_mbuv_4x4(MACROBLOCK *x); void vp9_transform_mby_4x4(MACROBLOCK *x); -void vp9_optimize_mby_4x4(MACROBLOCK *x); -void vp9_optimize_mbuv_4x4(MACROBLOCK *x); +void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x); +void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_mb_8x8(MACROBLOCK *mb); void vp9_transform_mby_8x8(MACROBLOCK *x); void vp9_transform_mbuv_8x8(MACROBLOCK *x); -void vp9_optimize_mby_8x8(MACROBLOCK *x); -void vp9_optimize_mbuv_8x8(MACROBLOCK *x); +void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x); +void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_mb_16x16(MACROBLOCK *mb); void vp9_transform_mby_16x16(MACROBLOCK *x); -void vp9_optimize_mby_16x16(MACROBLOCK *x); +void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sby_32x32(MACROBLOCK *x); -void vp9_optimize_sby_32x32(MACROBLOCK *x); +void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sby_16x16(MACROBLOCK *x); -void vp9_optimize_sby_16x16(MACROBLOCK *x); +void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sby_8x8(MACROBLOCK *x); -void vp9_optimize_sby_8x8(MACROBLOCK *x); +void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sby_4x4(MACROBLOCK *x); -void vp9_optimize_sby_4x4(MACROBLOCK *x); +void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sbuv_16x16(MACROBLOCK *x); -void vp9_optimize_sbuv_16x16(MACROBLOCK *x); +void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sbuv_8x8(MACROBLOCK *x); -void vp9_optimize_sbuv_8x8(MACROBLOCK *x); +void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sbuv_4x4(MACROBLOCK *x); -void vp9_optimize_sbuv_4x4(MACROBLOCK *x); +void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64y_32x32(MACROBLOCK *x); -void vp9_optimize_sb64y_32x32(MACROBLOCK *x); +void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64y_16x16(MACROBLOCK *x); -void vp9_optimize_sb64y_16x16(MACROBLOCK *x); +void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64y_8x8(MACROBLOCK *x); -void vp9_optimize_sb64y_8x8(MACROBLOCK *x); +void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64y_4x4(MACROBLOCK *x); -void vp9_optimize_sb64y_4x4(MACROBLOCK *x); +void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64uv_32x32(MACROBLOCK *x); -void vp9_optimize_sb64uv_32x32(MACROBLOCK *x); +void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64uv_16x16(MACROBLOCK *x); -void vp9_optimize_sb64uv_16x16(MACROBLOCK *x); +void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64uv_8x8(MACROBLOCK *x); -void vp9_optimize_sb64uv_8x8(MACROBLOCK *x); +void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_transform_sb64uv_4x4(MACROBLOCK *x); -void vp9_optimize_sb64uv_4x4(MACROBLOCK *x); +void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_fidct_mb(MACROBLOCK *x); +void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 23e4f8acf..31f847399 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1697,6 +1697,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->common.error.setjmp = 0; vp9_zero(cpi->y_uv_mode_count) +#if CONFIG_CODE_NONZEROCOUNT + vp9_zero(cm->fc.nzc_counts_4x4); + vp9_zero(cm->fc.nzc_counts_8x8); + vp9_zero(cm->fc.nzc_counts_16x16); + vp9_zero(cm->fc.nzc_counts_32x32); +#endif return (VP9_PTR) cpi; } @@ -3340,8 +3346,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16); vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32); if (!cpi->common.error_resilient_mode && - !cpi->common.frame_parallel_decoding_mode) + !cpi->common.frame_parallel_decoding_mode) { vp9_adapt_coef_probs(&cpi->common); +#if CONFIG_CODE_NONZEROCOUNT + vp9_adapt_nzc_probs(&cpi->common); +#endif + } if (cpi->common.frame_type != KEY_FRAME) { vp9_copy(cpi->common.fc.sb_ymode_counts, cpi->sb_ymode_count); vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 75331755b..13d043a14 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -112,6 +112,16 @@ typedef struct { int mv_ref_ct[INTER_MODE_CONTEXTS][4][2]; int vp9_mode_contexts[INTER_MODE_CONTEXTS][4]; +#if CONFIG_CODE_NONZEROCOUNT + vp9_prob nzc_probs_4x4 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES]; + vp9_prob nzc_probs_8x8 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES]; + vp9_prob nzc_probs_16x16 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES]; + vp9_prob nzc_probs_32x32 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES]; +#endif } CODING_CONTEXT; typedef struct { @@ -481,6 +491,25 @@ typedef struct VP9_COMP { vp9_coeff_probs frame_coef_probs_32x32[BLOCK_TYPES]; vp9_coeff_stats frame_branch_ct_32x32[BLOCK_TYPES]; +#if CONFIG_CODE_NONZEROCOUNT + vp9_prob frame_nzc_probs_4x4 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES]; + unsigned int frame_nzc_branch_ct_4x4 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC4X4_NODES][2]; + vp9_prob frame_nzc_probs_8x8 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES]; + unsigned int frame_nzc_branch_ct_8x8 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC8X8_NODES][2]; + vp9_prob frame_nzc_probs_16x16 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES]; + unsigned int frame_nzc_branch_ct_16x16 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC16X16_NODES][2]; + vp9_prob frame_nzc_probs_32x32 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES]; + unsigned int frame_nzc_branch_ct_32x32 + [MAX_NZC_CONTEXTS][REF_TYPES][BLOCK_TYPES][NZC32X32_NODES][2]; +#endif + int gfu_boost; int last_boost; int kf_boost; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 75f22fac0..66ee24840 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -40,6 +40,9 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { int zbin_oq_value = b->zbin_extra; int const *pt_scan ; +#if CONFIG_CODE_NONZEROCOUNT + int nzc = 0; +#endif switch (tx_type) { case ADST_DCT: @@ -81,6 +84,9 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { if (y) { eob = i; // last nonzero coeffs +#if CONFIG_CODE_NONZEROCOUNT + ++nzc; // number of nonzero coeffs +#endif zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength } } @@ -88,6 +94,9 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { } xd->eobs[b_idx] = eob + 1; +#if CONFIG_CODE_NONZEROCOUNT + xd->nzcs[b_idx] = nzc; +#endif } void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { @@ -107,6 +116,9 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { int16_t *dqcoeff_ptr = d->dqcoeff; int16_t *dequant_ptr = d->dequant; int zbin_oq_value = b->zbin_extra; +#if CONFIG_CODE_NONZEROCOUNT + int nzc = 0; +#endif vpx_memset(qcoeff_ptr, 0, 32); vpx_memset(dqcoeff_ptr, 0, 32); @@ -135,6 +147,9 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { if (y) { eob = i; // last nonzero coeffs +#if CONFIG_CODE_NONZEROCOUNT + ++nzc; // number of nonzero coeffs +#endif zbin_boost_ptr = b->zrun_zbin_boost; // reset zero runlength } } @@ -142,6 +157,9 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { } xd->eobs[b_idx] = eob + 1; +#if CONFIG_CODE_NONZEROCOUNT + xd->nzcs[b_idx] = nzc; +#endif } void vp9_quantize_mby_4x4_c(MACROBLOCK *x) { @@ -192,6 +210,9 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) { uint8_t *quant_shift_ptr = b->quant_shift; int16_t *dequant_ptr = d->dequant; int zbin_oq_value = b->zbin_extra; +#if CONFIG_CODE_NONZEROCOUNT + int nzc = 0; +#endif eob = -1; @@ -215,6 +236,9 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) { if (y) { eob = 0; // last nonzero coeffs +#if CONFIG_CODE_NONZEROCOUNT + ++nzc; // number of nonzero coeffs +#endif zero_run = 0; } } @@ -241,19 +265,33 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) { if (y) { eob = i; // last nonzero coeffs +#if CONFIG_CODE_NONZEROCOUNT + ++nzc; // number of nonzero coeffs +#endif zero_run = 0; } } } xd->eobs[b_idx] = eob + 1; +#if CONFIG_CODE_NONZEROCOUNT + xd->nzcs[b_idx] = nzc; +#endif } else { xd->eobs[b_idx] = 0; +#if CONFIG_CODE_NONZEROCOUNT + xd->nzcs[b_idx] = 0; +#endif } } void vp9_quantize_mby_8x8(MACROBLOCK *x) { int i; +#if CONFIG_CODE_NONZEROCOUNT + for (i = 0; i < 16; i ++) { + x->e_mbd.nzcs[i] = 0; + } +#endif for (i = 0; i < 16; i += 4) { x->quantize_b_8x8(x, i); } @@ -262,6 +300,11 @@ void vp9_quantize_mby_8x8(MACROBLOCK *x) { void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { int i; +#if CONFIG_CODE_NONZEROCOUNT + for (i = 16; i < 24; i ++) { + x->e_mbd.nzcs[i] = 0; + } +#endif for (i = 16; i < 24; i += 4) x->quantize_b_8x8(x, i); } @@ -272,6 +315,12 @@ void vp9_quantize_mb_8x8(MACROBLOCK *x) { } void vp9_quantize_mby_16x16(MACROBLOCK *x) { +#if CONFIG_CODE_NONZEROCOUNT + int i; + for (i = 0; i < 16; i++) { + x->e_mbd.nzcs[i] = 0; + } +#endif x->quantize_b_16x16(x, 0); } @@ -286,12 +335,19 @@ static void quantize(int16_t *zbin_boost_orig_ptr, uint8_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, - uint16_t *eob_ptr, const int *scan, int mul) { + uint16_t *eob_ptr, +#if CONFIG_CODE_NONZEROCOUNT + uint16_t *nzc_ptr, +#endif + const int *scan, int mul) { int i, rc, eob; int zbin; int x, y, z, sz; int zero_run = 0; int16_t *zbin_boost_ptr = zbin_boost_orig_ptr; +#if CONFIG_CODE_NONZEROCOUNT + int nzc = 0; +#endif vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); @@ -320,12 +376,18 @@ static void quantize(int16_t *zbin_boost_orig_ptr, if (y) { eob = i; // last nonzero coeffs zero_run = 0; +#if CONFIG_CODE_NONZEROCOUNT + ++nzc; // number of nonzero coeffs +#endif } } } } *eob_ptr = eob + 1; +#if CONFIG_CODE_NONZEROCOUNT + *nzc_ptr = nzc; +#endif } void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) { @@ -340,7 +402,11 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) { d->dqcoeff, d->dequant, b->zbin_extra, - &xd->eobs[b_idx], vp9_default_zig_zag1d_16x16, 1); + &xd->eobs[b_idx], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[b_idx], +#endif + vp9_default_zig_zag1d_16x16, 1); } void vp9_quantize_sby_32x32(MACROBLOCK *x) { @@ -358,6 +424,9 @@ void vp9_quantize_sby_32x32(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[0], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[0], +#endif vp9_default_zig_zag1d_32x32, 2); } @@ -378,6 +447,9 @@ void vp9_quantize_sby_16x16(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[n * 16], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[n * 16], +#endif vp9_default_zig_zag1d_16x16, 1); } @@ -398,6 +470,9 @@ void vp9_quantize_sby_8x8(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[n * 4], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[n * 4], +#endif vp9_default_zig_zag1d_8x8, 1); } @@ -418,6 +493,9 @@ void vp9_quantize_sby_4x4(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[n], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[n], +#endif vp9_default_zig_zag1d_4x4, 1); } @@ -437,6 +515,9 @@ void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { xd->block[cidx].dequant, x->block[cidx].zbin_extra, &xd->eobs[i], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[i], +#endif vp9_default_zig_zag1d_16x16, 1); } } @@ -457,6 +538,9 @@ void vp9_quantize_sbuv_8x8(MACROBLOCK *x) { xd->block[cidx].dequant, x->block[cidx].zbin_extra, &xd->eobs[i], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[i], +#endif vp9_default_zig_zag1d_8x8, 1); } } @@ -477,6 +561,9 @@ void vp9_quantize_sbuv_4x4(MACROBLOCK *x) { xd->block[cidx].dequant, x->block[cidx].zbin_extra, &xd->eobs[i], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[i], +#endif vp9_default_zig_zag1d_4x4, 1); } } @@ -498,6 +585,9 @@ void vp9_quantize_sb64y_32x32(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[n * 64], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[n * 64], +#endif vp9_default_zig_zag1d_32x32, 2); } @@ -518,6 +608,9 @@ void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[n * 16], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[n * 16], +#endif vp9_default_zig_zag1d_16x16, 1); } @@ -538,6 +631,9 @@ void vp9_quantize_sb64y_8x8(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[n * 4], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[n * 4], +#endif vp9_default_zig_zag1d_8x8, 1); } @@ -558,6 +654,9 @@ void vp9_quantize_sb64y_4x4(MACROBLOCK *x) { d->dequant, b->zbin_extra, &xd->eobs[n], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[n], +#endif vp9_default_zig_zag1d_4x4, 1); } @@ -577,6 +676,9 @@ void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) { xd->block[cidx].dequant, x->block[cidx].zbin_extra, &xd->eobs[i], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[i], +#endif vp9_default_zig_zag1d_32x32, 2); } } @@ -597,6 +699,9 @@ void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) { xd->block[cidx].dequant, x->block[cidx].zbin_extra, &xd->eobs[i], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[i], +#endif vp9_default_zig_zag1d_16x16, 1); } } @@ -617,6 +722,9 @@ void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) { xd->block[cidx].dequant, x->block[cidx].zbin_extra, &xd->eobs[i], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[i], +#endif vp9_default_zig_zag1d_8x8, 1); } } @@ -637,6 +745,9 @@ void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) { xd->block[cidx].dequant, x->block[cidx].zbin_extra, &xd->eobs[i], +#if CONFIG_CODE_NONZEROCOUNT + &xd->nzcs[i], +#endif vp9_default_zig_zag1d_4x4, 1); } } diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index d679aaf7a..a7415af12 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -185,6 +185,12 @@ void vp9_save_coding_context(VP9_COMP *cpi) { #if CONFIG_COMP_INTERINTRA_PRED cc->interintra_prob = cm->fc.interintra_prob; #endif +#if CONFIG_CODE_NONZEROCOUNT + vp9_copy(cc->nzc_probs_4x4, cm->fc.nzc_probs_4x4); + vp9_copy(cc->nzc_probs_8x8, cm->fc.nzc_probs_8x8); + vp9_copy(cc->nzc_probs_16x16, cm->fc.nzc_probs_16x16); + vp9_copy(cc->nzc_probs_32x32, cm->fc.nzc_probs_32x32); +#endif } void vp9_restore_coding_context(VP9_COMP *cpi) { @@ -240,6 +246,12 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { #if CONFIG_COMP_INTERINTRA_PRED cm->fc.interintra_prob = cc->interintra_prob; #endif +#if CONFIG_CODE_NONZEROCOUNT + vp9_copy(cm->fc.nzc_probs_4x4, cc->nzc_probs_4x4); + vp9_copy(cm->fc.nzc_probs_8x8, cc->nzc_probs_8x8); + vp9_copy(cm->fc.nzc_probs_16x16, cc->nzc_probs_16x16); + vp9_copy(cm->fc.nzc_probs_32x32, cc->nzc_probs_32x32); +#endif } void vp9_setup_key_frame(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 61379b84d..1b83091b3 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -156,6 +156,12 @@ static void fill_token_costs(vp9_coeff_count *c, for (j = 0; j < REF_TYPES; j++) for (k = 0; k < COEF_BANDS; k++) for (l = 0; l < PREV_COEF_CONTEXTS; l++) { +#if CONFIG_CODE_NONZEROCOUNT + // All costs are without the EOB node + vp9_cost_tokens_skip((int *)(c[i][j][k][l]), + p[i][j][k][l], + vp9_coef_tree); +#else if (l == 0 && k > 0) vp9_cost_tokens_skip((int *)(c[i][j][k][l]), p[i][j][k][l], @@ -164,9 +170,64 @@ static void fill_token_costs(vp9_coeff_count *c, vp9_cost_tokens((int *)(c[i][j][k][l]), p[i][j][k][l], vp9_coef_tree); +#endif } } +#if CONFIG_CODE_NONZEROCOUNT +static void fill_nzc_costs(VP9_COMP *cpi, int block_size) { + int nzc_context, r, b, nzc, values; + int cost[16]; + values = block_size * block_size + 1; + + for (nzc_context = 0; nzc_context < MAX_NZC_CONTEXTS; ++nzc_context) { + for (r = 0; r < REF_TYPES; ++r) { + for (b = 0; b < BLOCK_TYPES; ++b) { + if (block_size == 4) + vp9_cost_tokens(cost, + cpi->common.fc.nzc_probs_4x4[nzc_context][r][b], + vp9_nzc4x4_tree); + else if (block_size == 8) + vp9_cost_tokens(cost, + cpi->common.fc.nzc_probs_8x8[nzc_context][r][b], + vp9_nzc8x8_tree); + else if (block_size == 16) + vp9_cost_tokens(cost, + cpi->common.fc.nzc_probs_16x16[nzc_context][r][b], + vp9_nzc16x16_tree); + else + vp9_cost_tokens(cost, + cpi->common.fc.nzc_probs_32x32[nzc_context][r][b], + vp9_nzc32x32_tree); + + for (nzc = 0; nzc < values; ++nzc) { + int e, c, totalcost = 0; + c = codenzc(nzc); + totalcost = cost[c]; + if ((e = extranzcbits(c))) { + int x = nzc - basenzcvalue(c); + while (e--) { + if ((x >> e) & 1) + totalcost += vp9_cost_one(Pcat_nzc[nzc_context][c - 3][e]); + else + totalcost += vp9_cost_zero(Pcat_nzc[nzc_context][c - 3][e]); + } + } + if (block_size == 4) + cpi->mb.nzc_costs_4x4[nzc_context][r][b][nzc] = totalcost; + else if (block_size == 8) + cpi->mb.nzc_costs_8x8[nzc_context][r][b][nzc] = totalcost; + else if (block_size == 16) + cpi->mb.nzc_costs_16x16[nzc_context][r][b][nzc] = totalcost; + else + cpi->mb.nzc_costs_32x32[nzc_context][r][b][nzc] = totalcost; + } + } + } + } +} +#endif + static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -274,6 +335,12 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { cpi->common.fc.coef_probs_16x16, BLOCK_TYPES); fill_token_costs(cpi->mb.token_costs[TX_32X32], cpi->common.fc.coef_probs_32x32, BLOCK_TYPES); +#if CONFIG_CODE_NONZEROCOUNT + fill_nzc_costs(cpi, 4); + fill_nzc_costs(cpi, 8); + fill_nzc_costs(cpi, 16); + fill_nzc_costs(cpi, 32); +#endif /*rough estimate for costing*/ cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; @@ -379,7 +446,7 @@ int vp9_uvsse(MACROBLOCK *x) { return sse2; } -static INLINE int cost_coeffs(MACROBLOCK *mb, +static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int ib, PLANE_TYPE type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, @@ -390,8 +457,7 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, int pt; const int eob = xd->eobs[ib]; int c = 0; - int cost = 0, seg_eob; - const int segment_id = mbmi->segment_id; + int cost = 0; const int *scan; const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; const int ref = mbmi->ref_frame != INTRA_FRAME; @@ -406,12 +472,32 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, ENTROPY_CONTEXT *const l1 = l + sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT); +#if CONFIG_CODE_NONZEROCOUNT + int nzc_context = vp9_get_nzc_context(cm, xd, ib); + unsigned int *nzc_cost; +#else + int seg_eob; + const int segment_id = xd->mode_info_context->mbmi.segment_id; +#endif + + // Check for consistency of tx_size with mode info + if (type == PLANE_TYPE_Y_WITH_DC) { + assert(xd->mode_info_context->mbmi.txfm_size == tx_size); + } else { + TX_SIZE tx_size_uv = get_uv_tx_size(xd); + assert(tx_size == tx_size_uv); + } + switch (tx_size) { case TX_4X4: a_ec = *a; l_ec = *l; scan = vp9_default_zig_zag1d_4x4; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type]; +#else seg_eob = 16; +#endif if (type == PLANE_TYPE_Y_WITH_DC) { if (tx_type == ADST_DCT) { scan = vp9_row_scan_4x4; @@ -424,11 +510,19 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, a_ec = (a[0] + a[1]) != 0; l_ec = (l[0] + l[1]) != 0; scan = vp9_default_zig_zag1d_8x8; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type]; +#else seg_eob = 64; +#endif break; case TX_16X16: scan = vp9_default_zig_zag1d_16x16; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type]; +#else seg_eob = 256; +#endif if (type == PLANE_TYPE_UV) { a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; @@ -439,7 +533,11 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, break; case TX_32X32: scan = vp9_default_zig_zag1d_32x32; +#if CONFIG_CODE_NONZEROCOUNT + nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type]; +#else seg_eob = 1024; +#endif if (type == PLANE_TYPE_UV) { ENTROPY_CONTEXT *a2, *a3, *l2, *l3; a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); @@ -464,21 +562,33 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); +#if CONFIG_CODE_NONZEROCOUNT == 0 if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; +#endif { int recent_energy = 0; +#if CONFIG_CODE_NONZEROCOUNT + int nzc = 0; +#endif for (; c < eob; c++) { int v = qcoeff_ptr[scan[c]]; int t = vp9_dct_value_tokens_ptr[v].Token; +#if CONFIG_CODE_NONZEROCOUNT + nzc += (v != 0); +#endif cost += token_costs[get_coef_band(tx_size, c)][pt][t]; cost += vp9_dct_value_cost_ptr[v]; pt = vp9_get_coef_context(&recent_energy, t); } +#if CONFIG_CODE_NONZEROCOUNT + cost += nzc_cost[nzc]; +#else if (c < seg_eob) cost += mb->token_costs[tx_size][type][ref][get_coef_band(tx_size, c)] [pt][DCT_EOB_TOKEN]; +#endif } // is eob first coefficient; @@ -501,7 +611,7 @@ static INLINE int cost_coeffs(MACROBLOCK *mb, return cost; } -static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { +static int rdcost_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -521,7 +631,7 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC, + cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); @@ -529,7 +639,8 @@ static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) { return cost; } -static void macro_block_yrd_4x4(MACROBLOCK *mb, +static void macro_block_yrd_4x4(VP9_COMMON *const cm, + MACROBLOCK *mb, int *Rate, int *Distortion, int *skippable, int backup) { @@ -540,11 +651,11 @@ static void macro_block_yrd_4x4(MACROBLOCK *mb, vp9_quantize_mby_4x4(mb); *Distortion = vp9_mbblock_error(mb) >> 2; - *Rate = rdcost_mby_4x4(mb, backup); + *Rate = rdcost_mby_4x4(cm, mb, backup); *skippable = vp9_mby_is_skippable_4x4(xd); } -static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { +static int rdcost_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { int cost = 0; int b; MACROBLOCKD *xd = &mb->e_mbd; @@ -564,7 +675,7 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { } for (b = 0; b < 16; b += 4) - cost += cost_coeffs(mb, b, PLANE_TYPE_Y_WITH_DC, + cost += cost_coeffs(cm, mb, b, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); @@ -572,7 +683,8 @@ static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) { return cost; } -static void macro_block_yrd_8x8(MACROBLOCK *mb, +static void macro_block_yrd_8x8(VP9_COMMON *const cm, + MACROBLOCK *mb, int *Rate, int *Distortion, int *skippable, int backup) { @@ -583,11 +695,11 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb, vp9_quantize_mby_8x8(mb); *Distortion = vp9_mbblock_error(mb) >> 2; - *Rate = rdcost_mby_8x8(mb, backup); + *Rate = rdcost_mby_8x8(cm, mb, backup); *skippable = vp9_mby_is_skippable_8x8(xd); } -static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { +static int rdcost_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { int cost; MACROBLOCKD *xd = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; @@ -604,11 +716,12 @@ static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) { tl = (ENTROPY_CONTEXT *)xd->left_context; } - cost = cost_coeffs(mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); + cost = cost_coeffs(cm, mb, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16); return cost; } -static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, +static void macro_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *mb, + int *Rate, int *Distortion, int *skippable, int backup) { MACROBLOCKD *xd = &mb->e_mbd; @@ -620,10 +733,10 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, // optimization in the rate-distortion optimization loop? if (mb->optimize && xd->mode_info_context->mbmi.mode < I8X8_PRED) - vp9_optimize_mby_16x16(mb); + vp9_optimize_mby_16x16(cm, mb); *Distortion = vp9_mbblock_error(mb) >> 2; - *Rate = rdcost_mby_16x16(mb, backup); + *Rate = rdcost_mby_16x16(cm, mb, backup); *skippable = vp9_mby_is_skippable_16x16(xd); } @@ -715,15 +828,16 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int *skippable, int64_t txfm_cache[NB_TXFM_MODES]) { + VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB]; vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor, x->block[0].src_stride); - macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1); - macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1); - macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1); + macro_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1); + macro_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1); + macro_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable, txfm_cache, TX_16X16); @@ -738,8 +852,8 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { d[12] = p[12]; } -static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { - MACROBLOCKD * const xd = &x->e_mbd; +static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, int backup) { + MACROBLOCKD * xd = &x->e_mbd; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; ENTROPY_CONTEXT *ta, *tl; @@ -754,7 +868,7 @@ static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { tl = (ENTROPY_CONTEXT *) xd->left_context; } - return cost_coeffs(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); + return cost_coeffs(cm, x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); } static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, @@ -771,13 +885,14 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, } #define DEBUG_ERROR 0 -static void super_block_yrd_32x32(MACROBLOCK *x, +static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable, int backup) { MACROBLOCKD *const xd = &x->e_mbd; #if DEBUG_ERROR int16_t out[1024]; #endif + xd->mode_info_context->mbmi.txfm_size = TX_32X32; vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); @@ -791,7 +906,7 @@ static void super_block_yrd_32x32(MACROBLOCK *x, printf("IDCT/FDCT error 32x32: %d (d: %d)\n", vp9_block_error_c(x->src_diff, out, 1024), *distortion); #endif - *rate = rdcost_sby_32x32(x, backup); + *rate = rdcost_sby_32x32(cm, x, backup); *skippable = vp9_sby_is_skippable_32x32(xd); } @@ -818,7 +933,8 @@ static void super_block_yrd(VP9_COMP *cpi, vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); - super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1); + super_block_yrd_32x32(&cpi->common, x, + &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1); #if DEBUG_ERROR int err[3] = { 0, 0, 0 }; @@ -835,7 +951,7 @@ static void super_block_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_16X16][x_idx]; xd->left_context = &t_left[TX_16X16][y_idx]; - macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); + macro_block_yrd_16x16(&cpi->common, x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_16X16] += d_tmp; r[TX_16X16][0] += r_tmp; s[TX_16X16] = s[TX_16X16] && s_tmp; @@ -846,7 +962,7 @@ static void super_block_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_4X4][x_idx]; xd->left_context = &t_left[TX_4X4][y_idx]; - macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0); + macro_block_yrd_4x4(&cpi->common, x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_4X4] += d_tmp; r[TX_4X4][0] += r_tmp; s[TX_4X4] = s[TX_4X4] && s_tmp; @@ -857,7 +973,7 @@ static void super_block_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_8X8][x_idx]; xd->left_context = &t_left[TX_8X8][y_idx]; - macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0); + macro_block_yrd_8x8(&cpi->common, x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_8X8] += d_tmp; r[TX_8X8][0] += r_tmp; s[TX_8X8] = s[TX_8X8] && s_tmp; @@ -910,7 +1026,7 @@ static void super_block_64_yrd(VP9_COMP *cpi, src_y_stride, dst + 32 * x_idx + 32 * y_idx * dst_y_stride, dst_y_stride); - super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0); + super_block_yrd_32x32(&cpi->common, x, &r_tmp, &d_tmp, &s_tmp, 0); r[TX_32X32][0] += r_tmp; d[TX_32X32] += d_tmp; s[TX_32X32] = s[TX_32X32] && s_tmp; @@ -931,7 +1047,7 @@ static void super_block_64_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_16X16][x_idx]; xd->left_context = &t_left[TX_16X16][y_idx]; - macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); + macro_block_yrd_16x16(&cpi->common, x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_16X16] += d_tmp; r[TX_16X16][0] += r_tmp; s[TX_16X16] = s[TX_16X16] && s_tmp; @@ -942,7 +1058,7 @@ static void super_block_64_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_4X4][x_idx]; xd->left_context = &t_left[TX_4X4][y_idx]; - macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0); + macro_block_yrd_4x4(&cpi->common, x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_4X4] += d_tmp; r[TX_4X4][0] += r_tmp; s[TX_4X4] = s[TX_4X4] && s_tmp; @@ -953,7 +1069,7 @@ static void super_block_64_yrd(VP9_COMP *cpi, xd->above_context = &t_above[TX_8X8][x_idx]; xd->left_context = &t_left[TX_8X8][y_idx]; - macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0); + macro_block_yrd_8x8(&cpi->common, x, &r_tmp, &d_tmp, &s_tmp, 0); d[TX_8X8] += d_tmp; r[TX_8X8][0] += r_tmp; s[TX_8X8] = s[TX_8X8] && s_tmp; @@ -1006,6 +1122,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, int64_t best_rd = INT64_MAX; int rate = 0; int distortion; + VP9_COMMON *const cm = &cpi->common; ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; @@ -1022,6 +1139,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, #if CONFIG_NEWBINTRAMODES b->bmi.as_mode.context = vp9_find_bpred_context(b); #endif + xd->mode_info_context->mbmi.txfm_size = TX_4X4; for (mode = B_DC_PRED; mode < LEFT4X4; mode++) { int64_t this_rd; int ratey; @@ -1060,7 +1178,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, tempa = ta; templ = tl; - ratey = cost_coeffs(x, b - xd->block, + ratey = cost_coeffs(cm, x, b - xd->block, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); rate += ratey; distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; @@ -1311,6 +1429,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int *bestdistortion) { + VP9_COMMON *const cm = &cpi->common; MB_PREDICTION_MODE mode; MACROBLOCKD *xd = &x->e_mbd; int64_t best_rd = INT64_MAX; @@ -1365,7 +1484,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ta1 = ta0 + 1; tl1 = tl0 + 1; - rate_t = cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, + rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, ta0, tl0, TX_8X8); rate += rate_t; @@ -1398,12 +1517,12 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, x->quantize_b_4x4(x, ib + iblock[i]); } distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); - rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); if (do_two) { i++; - rate_t += cost_coeffs(x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); } @@ -1491,7 +1610,7 @@ static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } -static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) { +static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { int b; int cost = 0; MACROBLOCKD *xd = &mb->e_mbd; @@ -1510,7 +1629,7 @@ static int rd_cost_mbuv_4x4(MACROBLOCK *mb, int backup) { } for (b = 16; b < 24; b++) - cost += cost_coeffs(mb, b, PLANE_TYPE_UV, + cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_4X4][b], tl + vp9_block2left[TX_4X4][b], TX_4X4); @@ -1525,14 +1644,14 @@ static int64_t rd_inter16x16_uv_4x4(VP9_COMP *cpi, MACROBLOCK *x, int *rate, vp9_transform_mbuv_4x4(x); vp9_quantize_mbuv_4x4(x); - *rate = rd_cost_mbuv_4x4(x, do_ctx_backup); + *rate = rd_cost_mbuv_4x4(&cpi->common, x, do_ctx_backup); *distortion = vp9_mbuverror(x) / 4; *skip = vp9_mbuv_is_skippable_4x4(&x->e_mbd); return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { +static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { int b; int cost = 0; MACROBLOCKD *xd = &mb->e_mbd; @@ -1551,7 +1670,7 @@ static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) { } for (b = 16; b < 24; b += 4) - cost += cost_coeffs(mb, b, PLANE_TYPE_UV, + cost += cost_coeffs(cm, mb, b, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); @@ -1564,14 +1683,14 @@ static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, vp9_transform_mbuv_8x8(x); vp9_quantize_mbuv_8x8(x); - *rate = rd_cost_mbuv_8x8(x, do_ctx_backup); + *rate = rd_cost_mbuv_8x8(&cpi->common, x, do_ctx_backup); *distortion = vp9_mbuverror(x) / 4; *skip = vp9_mbuv_is_skippable_8x8(&x->e_mbd); return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { +static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int backup) { int b; int cost = 0; MACROBLOCKD *const xd = &x->e_mbd; @@ -1590,22 +1709,22 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { } for (b = 16; b < 24; b += 4) - cost += cost_coeffs(x, b * 4, PLANE_TYPE_UV, + cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_16X16); return cost; } -static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate, - int *distortion, int *skip, +static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, int *skip, int backup) { MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sbuv_16x16(x); vp9_quantize_sbuv_16x16(x); - *rate = rd_cost_sbuv_16x16(x, backup); + *rate = rd_cost_sbuv_16x16(cm, x, backup); *distortion = vp9_block_error_c(x->coeff + 1024, xd->dqcoeff + 1024, 512) >> 2; *skip = vp9_sbuv_is_skippable_16x16(xd); @@ -1623,7 +1742,7 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1); + rd_inter32x32_uv_16x16(&cpi->common, x, rate, distortion, skip, 1); } else { int n, r = 0, d = 0; int skippable = 1; @@ -1671,23 +1790,14 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static void super_block_64_uvrd(MACROBLOCK *x, int *rate, +static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skip); static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel, int *skip) { - super_block_64_uvrd(x, rate, distortion, skip); + super_block_64_uvrd(&cpi->common, x, rate, distortion, skip); return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, - int *distortion, int *skip, int fullpixel, - int mb_row, int mb_col) { - vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col); - vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, - x->e_mbd.predictor, x->src.uv_stride); - return rd_inter16x16_uv_4x4(cpi, x, rate, distortion, fullpixel, skip, 1); -} - static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1702,6 +1812,7 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); int rate_to, UNINITIALIZED_IS_SAFE(skip); + xd->mode_info_context->mbmi.txfm_size = TX_4X4; for (mode = DC_PRED; mode <= TM_PRED; mode++) { int rate; int distortion; @@ -1715,7 +1826,7 @@ static void rd_pick_intra_mbuv_mode(VP9_COMP *cpi, vp9_transform_mbuv_4x4(x); vp9_quantize_mbuv_4x4(x); - rate_to = rd_cost_mbuv_4x4(x, 1); + rate_to = rd_cost_mbuv_4x4(&cpi->common, x, 1); rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; @@ -1754,6 +1865,7 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r); int rate_to, UNINITIALIZED_IS_SAFE(skip); + xd->mode_info_context->mbmi.txfm_size = TX_8X8; for (mode = DC_PRED; mode <= TM_PRED; mode++) { int rate; int distortion; @@ -1767,7 +1879,7 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, vp9_quantize_mbuv_8x8(x); - rate_to = rd_cost_mbuv_8x8(x, 1); + rate_to = rd_cost_mbuv_8x8(&cpi->common, x, 1); rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode]; distortion = vp9_mbuverror(x) / 4; @@ -1789,7 +1901,8 @@ static void rd_pick_intra_mbuv_mode_8x8(VP9_COMP *cpi, } // TODO(rbultje) very similar to rd_inter32x32_uv(), merge? -static void super_block_uvrd(MACROBLOCK *x, +static void super_block_uvrd(VP9_COMMON *const cm, + MACROBLOCK *x, int *rate, int *distortion, int *skippable) { @@ -1803,7 +1916,7 @@ static void super_block_uvrd(MACROBLOCK *x, vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1); + rd_inter32x32_uv_16x16(cm, x, rate, distortion, skippable, 1); } else { int d = 0, r = 0, n, s = 1; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; @@ -1837,9 +1950,9 @@ static void super_block_uvrd(MACROBLOCK *x, xd->above_context = t_above + x_idx; xd->left_context = t_left + y_idx; if (mbmi->txfm_size == TX_4X4) { - r += rd_cost_mbuv_4x4(x, 0); + r += rd_cost_mbuv_4x4(cm, x, 0); } else { - r += rd_cost_mbuv_8x8(x, 0); + r += rd_cost_mbuv_8x8(cm, x, 0); } } @@ -1852,7 +1965,8 @@ static void super_block_uvrd(MACROBLOCK *x, } } -static int rd_cost_sb64uv_32x32(MACROBLOCK *x, int backup) { +static int rd_cost_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + int backup) { int b; int cost = 0; MACROBLOCKD *const xd = &x->e_mbd; @@ -1871,28 +1985,28 @@ static int rd_cost_sb64uv_32x32(MACROBLOCK *x, int backup) { } for (b = 16; b < 24; b += 4) - cost += cost_coeffs(x, b * 16, PLANE_TYPE_UV, + cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_UV, ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_32X32); return cost; } -static void rd_inter64x64_uv_32x32(MACROBLOCK *x, int *rate, - int *distortion, int *skip, +static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, int *skip, int backup) { MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sb64uv_32x32(x); vp9_quantize_sb64uv_32x32(x); - *rate = rd_cost_sb64uv_32x32(x, backup); + *rate = rd_cost_sb64uv_32x32(cm, x, backup); *distortion = vp9_block_error_c(x->coeff + 4096, xd->dqcoeff + 4096, 2048); *skip = vp9_sb64uv_is_skippable_32x32(xd); } -static void super_block_64_uvrd(MACROBLOCK *x, +static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable) { @@ -1913,7 +2027,7 @@ static void super_block_64_uvrd(MACROBLOCK *x, if (mbmi->txfm_size == TX_32X32) { vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); - rd_inter64x64_uv_32x32(x, &r, &d, &s, 1); + rd_inter64x64_uv_32x32(cm, x, &r, &d, &s, 1); } else if (mbmi->txfm_size == TX_16X16) { int n; @@ -1931,7 +2045,7 @@ static void super_block_64_uvrd(MACROBLOCK *x, dst_uv_stride); xd->above_context = t_above + x_idx * 2; xd->left_context = t_left + y_idx * 2; - rd_inter32x32_uv_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); + rd_inter32x32_uv_16x16(cm, x, &r_tmp, &d_tmp, &s_tmp, 0); r += r_tmp; d += d_tmp; s = s && s_tmp; @@ -1961,9 +2075,9 @@ static void super_block_64_uvrd(MACROBLOCK *x, xd->left_context = t_left + y_idx; d += vp9_mbuverror(x) >> 2; if (mbmi->txfm_size == TX_4X4) { - r += rd_cost_mbuv_4x4(x, 0); + r += rd_cost_mbuv_4x4(cm, x, 0); } else { - r += rd_cost_mbuv_8x8(x, 0); + r += rd_cost_mbuv_8x8(cm, x, 0); } } } @@ -1992,7 +2106,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, x->e_mbd.mode_info_context->mbmi.uv_mode = mode; vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - super_block_uvrd(x, &this_rate_tokenonly, + super_block_uvrd(&cpi->common, x, &this_rate_tokenonly, &this_distortion, &s); this_rate = this_rate_tokenonly + x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; @@ -2029,7 +2143,7 @@ static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, x->e_mbd.mode_info_context->mbmi.uv_mode = mode; vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); - super_block_64_uvrd(x, &this_rate_tokenonly, + super_block_64_uvrd(&cpi->common, x, &this_rate_tokenonly, &this_distortion, &s); this_rate = this_rate_tokenonly + x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; @@ -2186,7 +2300,8 @@ static int labels2mode( return cost; } -static int64_t encode_inter_mb_segment(MACROBLOCK *x, +static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, + MACROBLOCK *x, int const *labels, int which_label, int *labelyrate, @@ -2225,7 +2340,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, x->quantize_b_4x4(x, i); thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, i, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][i], tl + vp9_block2left[TX_4X4][i], TX_4X4); } @@ -2234,7 +2349,8 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x, return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); } -static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, +static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, + MACROBLOCK *x, int const *labels, int which_label, int *labelyrate, @@ -2288,10 +2404,12 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_8x8(x, idx); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); otherdist += thisdistortion; - othercost += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, + xd->mode_info_context->mbmi.txfm_size = TX_8X8; + othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, tacp + vp9_block2above[TX_8X8][idx], tlcp + vp9_block2left[TX_8X8][idx], TX_8X8); + xd->mode_info_context->mbmi.txfm_size = TX_4X4; } for (j = 0; j < 4; j += 2) { bd = &xd->block[ib + iblock[j]]; @@ -2300,15 +2418,17 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_4X4][ib + iblock[j]], - tl + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); - *labelyrate += cost_coeffs(x, ib + iblock[j] + 1, - PLANE_TYPE_Y_WITH_DC, - ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1], - tl + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); + *labelyrate += + cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, + ta + vp9_block2above[TX_4X4][ib + iblock[j]], + tl + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); + *labelyrate += + cost_coeffs(cm, x, ib + iblock[j] + 1, + PLANE_TYPE_Y_WITH_DC, + ta + vp9_block2above[TX_4X4][ib + iblock[j] + 1], + tl + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); } } else /* 8x8 */ { if (otherrd) { @@ -2319,22 +2439,26 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x, x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j]); thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); otherdist += thisdistortion; - othercost += cost_coeffs(x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[TX_4X4][ib + iblock[j]], - tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); - othercost += cost_coeffs(x, ib + iblock[j] + 1, - PLANE_TYPE_Y_WITH_DC, - tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1], - tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], - TX_4X4); + xd->mode_info_context->mbmi.txfm_size = TX_4X4; + othercost += + cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, + tacp + vp9_block2above[TX_4X4][ib + iblock[j]], + tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); + othercost += + cost_coeffs(cm, x, ib + iblock[j] + 1, + PLANE_TYPE_Y_WITH_DC, + tacp + vp9_block2above[TX_4X4][ib + iblock[j] + 1], + tlcp + vp9_block2left[TX_4X4][ib + iblock[j]], + TX_4X4); + xd->mode_info_context->mbmi.txfm_size = TX_8X8; } } x->fwd_txm8x8(be->src_diff, be2->coeff, 32); x->quantize_b_8x8(x, idx); thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); *distortion += thisdistortion; - *labelyrate += cost_coeffs(x, idx, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][idx], tl + vp9_block2left[TX_8X8][idx], TX_8X8); } @@ -2574,11 +2698,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, continue; if (segmentation == PARTITIONING_4X4) { - this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate, + this_rd = encode_inter_mb_segment(&cpi->common, + x, labels, i, &labelyrate, &distortion, ta_s, tl_s); other_rd = this_rd; } else { - this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate, + this_rd = encode_inter_mb_segment_8x8(&cpi->common, + x, labels, i, &labelyrate, &distortion, &other_rd, ta_s, tl_s); } @@ -3146,7 +3272,9 @@ static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x, // UV cost and distortion vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4) + if (x->e_mbd.mode_info_context->mbmi.txfm_size != TX_4X4 && + x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED && + x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) rd_inter16x16_uv_8x8(cpi, x, rate_uv, distortion_uv, cpi->common.full_pixel, &uv_skippable, 1); else @@ -3933,7 +4061,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_COMP_INTERINTRA_PRED int is_best_interintra = 0; int64_t best_intra16_rd = INT64_MAX; - int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; + int best_intra16_mode = DC_PRED; +#if SEPARATE_INTERINTRA_UV + int best_intra16_uv_mode = DC_PRED; +#endif #endif int64_t best_overall_rd = INT64_MAX; INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; @@ -4015,6 +4146,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cpi->zbin_mode_boost = 0; vp9_update_zbin_extra(cpi, x); + xd->mode_info_context->mbmi.mode = DC_PRED; + rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion, &uv_intra_skippable); @@ -4330,6 +4463,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, this_rd_thresh = (mbmi->ref_frame == GOLDEN_FRAME) ? cpi->rd_threshes[THR_NEWG] : this_rd_thresh; + xd->mode_info_context->mbmi.txfm_size = TX_4X4; for (switchable_filter_index = 0; switchable_filter_index < VP9_SWITCHABLE_FILTERS; @@ -4421,8 +4555,11 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd < best_yrd) { int uv_skippable; - rd_inter4x4_uv(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, - cpi->common.full_pixel, mb_row, mb_col); + vp9_build_inter4x4_predictors_mbuv(&x->e_mbd, mb_row, mb_col); + vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, + x->e_mbd.predictor, x->src.uv_stride); + rd_inter16x16_uv_4x4(cpi, x, &rate_uv, &distortion_uv, + cpi->common.full_pixel, &uv_skippable, 1); rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; @@ -4543,8 +4680,10 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (this_rd < best_intra16_rd)) { best_intra16_rd = this_rd; best_intra16_mode = this_mode; +#if SEPARATE_INTERINTRA_UV best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ? uv_intra_mode_8x8 : uv_intra_mode); +#endif } #endif @@ -4792,6 +4931,7 @@ void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, int64_t txfm_cache[NB_TXFM_MODES], err; int i; + xd->mode_info_context->mbmi.mode = DC_PRED; err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, txfm_cache); rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, @@ -4826,6 +4966,7 @@ void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, int64_t txfm_cache[NB_TXFM_MODES], err; int i; + xd->mode_info_context->mbmi.mode = DC_PRED; err = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, txfm_cache); rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, @@ -4873,6 +5014,7 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int i; mbmi->ref_frame = INTRA_FRAME; + mbmi->mode = DC_PRED; rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv, &uv_intra_skippable); modeuv = mbmi->uv_mode; @@ -5002,7 +5144,10 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_COMP_INTERINTRA_PRED int is_best_interintra = 0; int64_t best_intra16_rd = INT64_MAX; - int best_intra16_mode = DC_PRED, best_intra16_uv_mode = DC_PRED; + int best_intra16_mode = DC_PRED; +#if SEPARATE_INTERINTRA_UV + int best_intra16_uv_mode = DC_PRED; +#endif #endif int64_t best_overall_rd = INT64_MAX; INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; @@ -5334,8 +5479,10 @@ static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, (this_rd < best_intra16_rd)) { best_intra16_rd = this_rd; best_intra16_mode = this_mode; +#if SEPARATE_INTERINTRA_UV best_intra16_uv_mode = (mbmi->txfm_size != TX_4X4 ? mode_uv_8x8 : mode_uv_4x4); +#endif } #endif diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index cfaf5f592..a04a20c29 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -219,10 +219,8 @@ static void count_segs(VP9_COMP *cpi, const int segment_id = mi->mbmi.segment_id; xd->mode_info_context = mi; - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - mb_size - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - mb_size - mb_col) * 16) << 3; + set_mb_row(cm, xd, mb_row, mb_size); + set_mb_col(cm, xd, mb_col, mb_size); // Count the number of hits on each segment with no prediction no_pred_segcounts[segment_id]++; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index d115fe80e..0fad9b032 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -119,6 +119,11 @@ static void tokenize_b(VP9_COMP *cpi, get_tx_type(xd, &xd->block[ib]) : DCT_DCT; const int ref = mbmi->ref_frame != INTRA_FRAME; ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; +#if CONFIG_CODE_NONZEROCOUNT + int zerosleft, nzc = 0; + if (eob == 0) + assert(xd->nzcs[ib] == 0); +#endif if (sb_type == BLOCK_SIZE_SB64X64) { a = (ENTROPY_CONTEXT *)xd->above_context + @@ -207,29 +212,47 @@ static void tokenize_b(VP9_COMP *cpi, do { const int band = get_coef_band(tx_size, c); int token; - + int v = 0; +#if CONFIG_CODE_NONZEROCOUNT + zerosleft = seg_eob - xd->nzcs[ib] - c + nzc; +#endif if (c < eob) { const int rc = scan[c]; - const int v = qcoeff_ptr[rc]; + v = qcoeff_ptr[rc]; assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE); t->Extra = vp9_dct_value_tokens_ptr[v].Extra; token = vp9_dct_value_tokens_ptr[v].Token; } else { +#if CONFIG_CODE_NONZEROCOUNT + break; +#else token = DCT_EOB_TOKEN; +#endif } t->Token = token; t->context_tree = probs[type][ref][band][pt]; +#if CONFIG_CODE_NONZEROCOUNT + // Skip zero node if there are no zeros left + t->skip_eob_node = 1 + (zerosleft == 0); +#else t->skip_eob_node = (pt == 0) && (band > 0); +#endif assert(vp9_coef_encodings[t->Token].Len - t->skip_eob_node > 0); if (!dry_run) { ++counts[type][ref][band][pt][token]; } +#if CONFIG_CODE_NONZEROCOUNT + nzc += (v != 0); +#endif pt = vp9_get_coef_context(&recent_energy, token); ++t; } while (c < eob && ++c < seg_eob); +#if CONFIG_CODE_NONZEROCOUNT + assert(nzc == xd->nzcs[ib]); +#endif *tp = t; a_ec = l_ec = (c > 0); /* 0 <-> all coeff data is zero */ @@ -903,13 +926,15 @@ static void stuff_b(VP9_COMP *cpi, PLANE_TYPE type, TX_SIZE tx_size, int dry_run) { + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; +#if CONFIG_CODE_NONZEROCOUNT == 0 vp9_coeff_count *counts; vp9_coeff_probs *probs; int pt, band; TOKENEXTRA *t = *tp; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; const int ref = mbmi->ref_frame != INTRA_FRAME; - const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; +#endif ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; if (sb_type == BLOCK_SIZE_SB32X32) { @@ -939,14 +964,18 @@ static void stuff_b(VP9_COMP *cpi, case TX_4X4: a_ec = a[0]; l_ec = l[0]; +#if CONFIG_CODE_NONZEROCOUNT == 0 counts = cpi->coef_counts_4x4; probs = cpi->common.fc.coef_probs_4x4; +#endif break; case TX_8X8: a_ec = (a[0] + a[1]) != 0; l_ec = (l[0] + l[1]) != 0; +#if CONFIG_CODE_NONZEROCOUNT == 0 counts = cpi->coef_counts_8x8; probs = cpi->common.fc.coef_probs_8x8; +#endif break; case TX_16X16: if (type != PLANE_TYPE_UV) { @@ -956,8 +985,10 @@ static void stuff_b(VP9_COMP *cpi, a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; } +#if CONFIG_CODE_NONZEROCOUNT == 0 counts = cpi->coef_counts_16x16; probs = cpi->common.fc.coef_probs_16x16; +#endif break; case TX_32X32: if (type != PLANE_TYPE_UV) { @@ -971,19 +1002,25 @@ static void stuff_b(VP9_COMP *cpi, l_ec = (l[0] + l[1] + l1[0] + l1[1] + l2[0] + l2[1] + l3[0] + l3[1]) != 0; } +#if CONFIG_CODE_NONZEROCOUNT == 0 counts = cpi->coef_counts_32x32; probs = cpi->common.fc.coef_probs_32x32; +#endif break; } +#if CONFIG_CODE_NONZEROCOUNT == 0 VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec); - band = get_coef_band(tx_size, 0); t->Token = DCT_EOB_TOKEN; t->context_tree = probs[type][ref][band][pt]; t->skip_eob_node = 0; ++t; *tp = t; + if (!dry_run) { + ++counts[type][ref][band][pt][DCT_EOB_TOKEN]; + } +#endif *a = *l = 0; if (tx_size == TX_8X8) { a[1] = 0; @@ -1009,10 +1046,6 @@ static void stuff_b(VP9_COMP *cpi, l2[0] = l2[1] = l3[0] = l3[1] = l_ec; } } - - if (!dry_run) { - ++counts[type][ref][band][pt][DCT_EOB_TOKEN]; - } } static void stuff_mb_8x8(VP9_COMP *cpi, MACROBLOCKD *xd, diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c index 8e252813c..951ffa798 100644 --- a/vp9/encoder/vp9_treewriter.c +++ b/vp9/encoder/vp9_treewriter.c @@ -35,5 +35,6 @@ void vp9_cost_tokens(int *c, const vp9_prob *p, vp9_tree t) { } void vp9_cost_tokens_skip(int *c, const vp9_prob *p, vp9_tree t) { + c[0] = 0; cost(c, t, p, 2, 0); }