vp9_optimize_b: Combine extrabits cost with token lookup

About 0.6% fewer cycles spent in vp9_optimize_b.

Change-Id: I2ae62a78374c594ed81d4e3100a5848e2f6f2c4e
This commit is contained in:
Alex Converse 2017-03-16 16:34:26 -07:00
parent 8440cc4817
commit 3a6ec9ea72
2 changed files with 19 additions and 12 deletions

View File

@ -108,7 +108,6 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int rate0, rate1; int rate0, rate1;
int64_t error0, error1; int64_t error0, error1;
int16_t t0, t1; int16_t t0, t1;
EXTRABIT e0;
unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
mb->token_costs[tx_size][type][ref]; mb->token_costs[tx_size][type][ref];
int best, band, pt, i, final_eob; int best, band, pt, i, final_eob;
@ -144,7 +143,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
/* Evaluate the first possibility for this state. */ /* Evaluate the first possibility for this state. */
rate0 = tokens[next][0].rate; rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate; rate1 = tokens[next][1].rate;
vp9_get_token_extra(x, &t0, &e0); vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits);
/* Consider both possible successor states. */ /* Consider both possible successor states. */
if (next < default_eob) { if (next < default_eob) {
band = band_translate[i + 1]; band = band_translate[i + 1];
@ -155,7 +154,6 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
UPDATE_RD_COST(); UPDATE_RD_COST();
/* And pick the best. */ /* And pick the best. */
best = rd_cost1 < rd_cost0; best = rd_cost1 < rd_cost0;
base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -193,9 +191,9 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
*/ */
t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
e0 = 0; base_bits = 0;
} else { } else {
vp9_get_token_extra(x, &t0, &e0); vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits);
t1 = t0; t1 = t0;
} }
if (next < default_eob) { if (next < default_eob) {
@ -213,7 +211,6 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
UPDATE_RD_COST(); UPDATE_RD_COST();
/* And pick the best. */ /* And pick the best. */
best = rd_cost1 < rd_cost0; best = rd_cost1 < rd_cost0;
base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {

View File

@ -79,12 +79,22 @@ extern const int16_t vp9_cat6_low_cost[256];
extern const uint16_t vp9_cat6_high_cost[64]; extern const uint16_t vp9_cat6_high_cost[64];
extern const uint16_t vp9_cat6_high10_high_cost[256]; extern const uint16_t vp9_cat6_high10_high_cost[256];
extern const uint16_t vp9_cat6_high12_high_cost[1024]; extern const uint16_t vp9_cat6_high12_high_cost[1024];
static INLINE int vp9_get_cost(int16_t token, EXTRABIT extrabits,
const uint16_t *cat6_high_table) { static INLINE void vp9_get_token_extracost(const uint16_t *cat6_high_table,
if (token != CATEGORY6_TOKEN) int v, int16_t *token,
return vp9_extra_bits[token].cost[extrabits >> 1]; int *extracost) {
return vp9_cat6_low_cost[(extrabits >> 1) & 0xff] + EXTRABIT extrabits; // unsigned extrabits
cat6_high_table[extrabits >> 9]; v = abs(v);
if (v >= CAT6_MIN_VAL) {
*token = CATEGORY6_TOKEN;
extrabits = v - CAT6_MIN_VAL;
*extracost =
vp9_cat6_low_cost[extrabits & 0xff] + cat6_high_table[extrabits >> 8];
} else {
*token = vp9_dct_cat_lt_10_value_tokens[v].token;
extrabits = vp9_dct_cat_lt_10_value_tokens[v].extra >> 1;
*extracost = vp9_extra_bits[*token].cost[extrabits];
}
} }
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH