Merge changes Ie989e60c,Ifc110b12

* changes:
  Backport "Optimize the use case of token_cost table" to VP9
  Drop vp9_get_token_extracost
This commit is contained in:
Alex Converse
2017-03-23 18:05:13 +00:00
committed by Gerrit Code Review
2 changed files with 47 additions and 36 deletions

View File

@@ -80,6 +80,18 @@ static int trellis_get_coeff_context(const int16_t *scan, const int16_t *nb,
return pt; return pt;
} }
static const int16_t band_count_table[TX_SIZES][8] = {
{ 1, 2, 3, 4, 3, 16 - 13, 0 },
{ 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 },
{ 1, 2, 3, 4, 11, 1024 - 21, 0 },
};
static const int16_t band_cum_count_table[TX_SIZES][8] = {
{ 0, 1, 3, 6, 10, 13, 16, 0 },
{ 0, 1, 3, 6, 10, 21, 64, 0 },
{ 0, 1, 3, 6, 10, 21, 256, 0 },
{ 0, 1, 3, 6, 10, 21, 1024, 0 },
};
int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int ctx) { int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd; MACROBLOCKD *const xd = &mb->e_mbd;
@@ -108,14 +120,20 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int rate0, rate1; int rate0, rate1;
int64_t error0, error1; int64_t error0, error1;
int16_t t0, t1; int16_t t0, t1;
unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = int best, band = (eob < default_eob) ? band_translate[eob]
mb->token_costs[tx_size][type][ref]; : band_translate[eob - 1];
int best, band, pt, i, final_eob; int pt, i, final_eob;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
#else #else
const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
#endif #endif
unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
mb->token_costs[tx_size][type][ref];
const int16_t *band_counts = &band_count_table[tx_size][band];
int16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
token_costs += band;
assert((!type && !plane) || (type && plane)); assert((!type && !plane) || (type && plane));
assert(eob <= default_eob); assert(eob <= default_eob);
@@ -129,8 +147,10 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
tokens[eob][0].qc = 0; tokens[eob][0].qc = 0;
tokens[eob][1] = tokens[eob][0]; tokens[eob][1] = tokens[eob][0];
for (i = 0; i < eob; i++) for (i = 0; i < eob; i++) {
token_cache[scan[i]] = vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])]; const int rc = scan[i];
token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
}
for (i = eob; i-- > 0;) { for (i = eob; i-- > 0;) {
int base_bits, d2, dx; int base_bits, d2, dx;
@@ -143,13 +163,12 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
/* Evaluate the first possibility for this state. */ /* Evaluate the first possibility for this state. */
rate0 = tokens[next][0].rate; rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate; rate1 = tokens[next][1].rate;
vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits); base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost);
/* Consider both possible successor states. */ /* Consider both possible successor states. */
if (next < default_eob) { if (next < default_eob) {
band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += token_costs[band][0][pt][tokens[next][0].token]; rate0 += (*token_costs)[0][pt][tokens[next][0].token];
rate1 += token_costs[band][0][pt][tokens[next][1].token]; rate1 += (*token_costs)[0][pt][tokens[next][1].token];
} }
UPDATE_RD_COST(); UPDATE_RD_COST();
/* And pick the best. */ /* And pick the best. */
@@ -181,6 +200,12 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
} else { } else {
tokens[i][1] = tokens[i][0]; tokens[i][1] = tokens[i][0];
next = i; next = i;
if (!(--band_left)) {
--band_counts;
band_left = *band_counts;
--token_costs;
}
continue; continue;
} }
@@ -193,18 +218,17 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
base_bits = 0; base_bits = 0;
} else { } else {
vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits); base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost);
t1 = t0; t1 = t0;
} }
if (next < default_eob) { if (next < default_eob) {
band = band_translate[i + 1];
if (t0 != EOB_TOKEN) { if (t0 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += token_costs[band][!x][pt][tokens[next][0].token]; rate0 += (*token_costs)[!x][pt][tokens[next][0].token];
} }
if (t1 != EOB_TOKEN) { if (t1 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
rate1 += token_costs[band][!x][pt][tokens[next][1].token]; rate1 += (*token_costs)[!x][pt][tokens[next][1].token];
} }
} }
@@ -252,34 +276,38 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
/* There's no choice to make for a zero coefficient, so we don't /* There's no choice to make for a zero coefficient, so we don't
* add a new trellis node, but we do need to update the costs. * add a new trellis node, but we do need to update the costs.
*/ */
band = band_translate[i + 1];
pt = get_coef_context(nb, token_cache, i + 1); pt = get_coef_context(nb, token_cache, i + 1);
t0 = tokens[next][0].token; t0 = tokens[next][0].token;
t1 = tokens[next][1].token; t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */ /* Update the cost of each path if we're past the EOB token. */
if (t0 != EOB_TOKEN) { if (t0 != EOB_TOKEN) {
tokens[next][0].rate += token_costs[band][1][pt][t0]; tokens[next][0].rate += (*token_costs)[1][pt][t0];
tokens[next][0].token = ZERO_TOKEN; tokens[next][0].token = ZERO_TOKEN;
} }
if (t1 != EOB_TOKEN) { if (t1 != EOB_TOKEN) {
tokens[next][1].rate += token_costs[band][1][pt][t1]; tokens[next][1].rate += (*token_costs)[1][pt][t1];
tokens[next][1].token = ZERO_TOKEN; tokens[next][1].token = ZERO_TOKEN;
} }
tokens[i][0].best_index = tokens[i][1].best_index = 0; tokens[i][0].best_index = tokens[i][1].best_index = 0;
/* Don't update next, because we didn't add a new node. */ /* Don't update next, because we didn't add a new node. */
} }
if (!(--band_left)) {
--band_counts;
band_left = *band_counts;
--token_costs;
}
} }
/* Now pick the best path through the whole trellis. */ /* Now pick the best path through the whole trellis. */
band = band_translate[i + 1];
rate0 = tokens[next][0].rate; rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate; rate1 = tokens[next][1].rate;
error0 = tokens[next][0].error; error0 = tokens[next][0].error;
error1 = tokens[next][1].error; error1 = tokens[next][1].error;
t0 = tokens[next][0].token; t0 = tokens[next][0].token;
t1 = tokens[next][1].token; t1 = tokens[next][1].token;
rate0 += token_costs[band][0][ctx][t0]; rate0 += (*token_costs)[0][ctx][t0];
rate1 += token_costs[band][0][ctx][t1]; rate1 += (*token_costs)[0][ctx][t1];
UPDATE_RD_COST(); UPDATE_RD_COST();
best = rd_cost1 < rd_cost0; best = rd_cost1 < rd_cost0;
final_eob = -1; final_eob = -1;

View File

@@ -80,23 +80,6 @@ extern const uint16_t vp9_cat6_high_cost[64];
extern const uint16_t vp9_cat6_high10_high_cost[256]; extern const uint16_t vp9_cat6_high10_high_cost[256];
extern const uint16_t vp9_cat6_high12_high_cost[1024]; extern const uint16_t vp9_cat6_high12_high_cost[1024];
static INLINE void vp9_get_token_extracost(const uint16_t *cat6_high_table,
int v, int16_t *token,
int *extracost) {
EXTRABIT extrabits; // unsigned extrabits
v = abs(v);
if (v >= CAT6_MIN_VAL) {
*token = CATEGORY6_TOKEN;
extrabits = v - CAT6_MIN_VAL;
*extracost =
vp9_cat6_low_cost[extrabits & 0xff] + cat6_high_table[extrabits >> 8];
} else {
*token = vp9_dct_cat_lt_10_value_tokens[v].token;
extrabits = vp9_dct_cat_lt_10_value_tokens[v].extra >> 1;
*extracost = vp9_extra_bits[*token].cost[extrabits];
}
}
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
static INLINE const uint16_t *vp9_get_high_cost_table(int bit_depth) { static INLINE const uint16_t *vp9_get_high_cost_table(int bit_depth) {
return bit_depth == 8 ? vp9_cat6_high_cost return bit_depth == 8 ? vp9_cat6_high_cost