diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index cf92dbafb..0940d9a61 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -80,6 +80,18 @@ static int trellis_get_coeff_context(const int16_t *scan, const int16_t *nb, return pt; } +static const int16_t band_count_table[TX_SIZES][8] = { + { 1, 2, 3, 4, 3, 16 - 13, 0 }, + { 1, 2, 3, 4, 11, 64 - 21, 0 }, + { 1, 2, 3, 4, 11, 256 - 21, 0 }, + { 1, 2, 3, 4, 11, 1024 - 21, 0 }, +}; +static const int16_t band_cum_count_table[TX_SIZES][8] = { + { 0, 1, 3, 6, 10, 13, 16, 0 }, + { 0, 1, 3, 6, 10, 21, 64, 0 }, + { 0, 1, 3, 6, 10, 21, 256, 0 }, + { 0, 1, 3, 6, 10, 21, 1024, 0 }, +}; int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; @@ -108,14 +120,20 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int rate0, rate1; int64_t error0, error1; int16_t t0, t1; - unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = - mb->token_costs[tx_size][type][ref]; - int best, band, pt, i, final_eob; + int best, band = (eob < default_eob) ? band_translate[eob] + : band_translate[eob - 1]; + int pt, i, final_eob; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); #endif + unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = + mb->token_costs[tx_size][type][ref]; + const int16_t *band_counts = &band_count_table[tx_size][band]; + int16_t band_left = eob - band_cum_count_table[tx_size][band] + 1; + + token_costs += band; assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); @@ -129,8 +147,10 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, tokens[eob][0].qc = 0; tokens[eob][1] = tokens[eob][0]; - for (i = 0; i < eob; i++) - token_cache[scan[i]] = vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])]; + for (i = 0; i < eob; i++) { + const int rc = scan[i]; + token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])]; + } for (i = eob; i-- > 0;) { int base_bits, d2, dx; @@ -143,13 +163,12 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, /* Evaluate the first possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits); + base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost); /* Consider both possible successor states. */ if (next < default_eob) { - band = band_translate[i + 1]; pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += token_costs[band][0][pt][tokens[next][0].token]; - rate1 += token_costs[band][0][pt][tokens[next][1].token]; + rate0 += (*token_costs)[0][pt][tokens[next][0].token]; + rate1 += (*token_costs)[0][pt][tokens[next][1].token]; } UPDATE_RD_COST(); /* And pick the best. */ @@ -181,6 +200,12 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, } else { tokens[i][1] = tokens[i][0]; next = i; + + if (!(--band_left)) { + --band_counts; + band_left = *band_counts; + --token_costs; + } continue; } @@ -193,18 +218,17 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; base_bits = 0; } else { - vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits); + base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost); t1 = t0; } if (next < default_eob) { - band = band_translate[i + 1]; if (t0 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += token_costs[band][!x][pt][tokens[next][0].token]; + rate0 += (*token_costs)[!x][pt][tokens[next][0].token]; } if (t1 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); - rate1 += token_costs[band][!x][pt][tokens[next][1].token]; + rate1 += (*token_costs)[!x][pt][tokens[next][1].token]; } } @@ -252,34 +276,38 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, /* There's no choice to make for a zero coefficient, so we don't * add a new trellis node, but we do need to update the costs. */ - band = band_translate[i + 1]; pt = get_coef_context(nb, token_cache, i + 1); t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ if (t0 != EOB_TOKEN) { - tokens[next][0].rate += token_costs[band][1][pt][t0]; + tokens[next][0].rate += (*token_costs)[1][pt][t0]; tokens[next][0].token = ZERO_TOKEN; } if (t1 != EOB_TOKEN) { - tokens[next][1].rate += token_costs[band][1][pt][t1]; + tokens[next][1].rate += (*token_costs)[1][pt][t1]; tokens[next][1].token = ZERO_TOKEN; } tokens[i][0].best_index = tokens[i][1].best_index = 0; /* Don't update next, because we didn't add a new node. */ } + + if (!(--band_left)) { + --band_counts; + band_left = *band_counts; + --token_costs; + } } /* Now pick the best path through the whole trellis. */ - band = band_translate[i + 1]; rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; error0 = tokens[next][0].error; error1 = tokens[next][1].error; t0 = tokens[next][0].token; t1 = tokens[next][1].token; - rate0 += token_costs[band][0][ctx][t0]; - rate1 += token_costs[band][0][ctx][t1]; + rate0 += (*token_costs)[0][ctx][t0]; + rate1 += (*token_costs)[0][ctx][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; final_eob = -1; diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 4b8d7ad5c..b2f63ffef 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -80,23 +80,6 @@ extern const uint16_t vp9_cat6_high_cost[64]; extern const uint16_t vp9_cat6_high10_high_cost[256]; extern const uint16_t vp9_cat6_high12_high_cost[1024]; -static INLINE void vp9_get_token_extracost(const uint16_t *cat6_high_table, - int v, int16_t *token, - int *extracost) { - EXTRABIT extrabits; // unsigned extrabits - v = abs(v); - if (v >= CAT6_MIN_VAL) { - *token = CATEGORY6_TOKEN; - extrabits = v - CAT6_MIN_VAL; - *extracost = - vp9_cat6_low_cost[extrabits & 0xff] + cat6_high_table[extrabits >> 8]; - } else { - *token = vp9_dct_cat_lt_10_value_tokens[v].token; - extrabits = vp9_dct_cat_lt_10_value_tokens[v].extra >> 1; - *extracost = vp9_extra_bits[*token].cost[extrabits]; - } -} - #if CONFIG_VP9_HIGHBITDEPTH static INLINE const uint16_t *vp9_get_high_cost_table(int bit_depth) { return bit_depth == 8 ? vp9_cat6_high_cost