Port "cost_coeff speed improvements" to vp9.
About a 5% faster overall encode (perf cycles) at speed zero! Change-Id: Iaf013ba75884415cd824e98349f654ffb1c3ef33
This commit is contained in:
parent
1ff2935ebf
commit
74a679de6f
@ -387,15 +387,14 @@ static int cost_coeffs(MACROBLOCK *x,
|
|||||||
cost = token_costs[0][0][pt][EOB_TOKEN];
|
cost = token_costs[0][0][pt][EOB_TOKEN];
|
||||||
c = 0;
|
c = 0;
|
||||||
} else {
|
} else {
|
||||||
|
if (use_fast_coef_costing) {
|
||||||
int band_left = *band_count++;
|
int band_left = *band_count++;
|
||||||
|
|
||||||
// dc token
|
// dc token
|
||||||
int v = qcoeff[0];
|
int v = qcoeff[0];
|
||||||
int16_t prev_t;
|
int16_t prev_t;
|
||||||
EXTRABIT e;
|
cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost);
|
||||||
vp9_get_token_extra(v, &prev_t, &e);
|
cost += (*token_costs)[0][pt][prev_t];
|
||||||
cost = (*token_costs)[0][pt][prev_t] +
|
|
||||||
vp9_get_cost(prev_t, e, cat6_high_cost);
|
|
||||||
|
|
||||||
token_cache[0] = vp9_pt_energy_class[prev_t];
|
token_cache[0] = vp9_pt_energy_class[prev_t];
|
||||||
++token_costs;
|
++token_costs;
|
||||||
@ -406,16 +405,8 @@ static int cost_coeffs(MACROBLOCK *x,
|
|||||||
int16_t t;
|
int16_t t;
|
||||||
|
|
||||||
v = qcoeff[rc];
|
v = qcoeff[rc];
|
||||||
vp9_get_token_extra(v, &t, &e);
|
cost += vp9_get_token_cost(v, &t, cat6_high_cost);
|
||||||
if (use_fast_coef_costing) {
|
cost += (*token_costs)[!prev_t][!prev_t][t];
|
||||||
cost += (*token_costs)[!prev_t][!prev_t][t] +
|
|
||||||
vp9_get_cost(t, e, cat6_high_cost);
|
|
||||||
} else {
|
|
||||||
pt = get_coef_context(nb, token_cache, c);
|
|
||||||
cost += (*token_costs)[!prev_t][pt][t] +
|
|
||||||
vp9_get_cost(t, e, cat6_high_cost);
|
|
||||||
token_cache[rc] = vp9_pt_energy_class[t];
|
|
||||||
}
|
|
||||||
prev_t = t;
|
prev_t = t;
|
||||||
if (!--band_left) {
|
if (!--band_left) {
|
||||||
band_left = *band_count++;
|
band_left = *band_count++;
|
||||||
@ -424,10 +415,42 @@ static int cost_coeffs(MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// eob token
|
// eob token
|
||||||
if (band_left) {
|
if (band_left)
|
||||||
if (use_fast_coef_costing) {
|
|
||||||
cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
|
cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
|
||||||
} else {
|
|
||||||
|
} else { // !use_fast_coef_costing
|
||||||
|
int band_left = *band_count++;
|
||||||
|
|
||||||
|
// dc token
|
||||||
|
int v = qcoeff[0];
|
||||||
|
int16_t tok;
|
||||||
|
unsigned int (*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
|
||||||
|
cost = vp9_get_token_cost(v, &tok, cat6_high_cost);
|
||||||
|
cost += (*token_costs)[0][pt][tok];
|
||||||
|
|
||||||
|
token_cache[0] = vp9_pt_energy_class[tok];
|
||||||
|
++token_costs;
|
||||||
|
|
||||||
|
tok_cost_ptr = &((*token_costs)[!tok]);
|
||||||
|
|
||||||
|
// ac tokens
|
||||||
|
for (c = 1; c < eob; c++) {
|
||||||
|
const int rc = scan[c];
|
||||||
|
|
||||||
|
v = qcoeff[rc];
|
||||||
|
cost += vp9_get_token_cost(v, &tok, cat6_high_cost);
|
||||||
|
pt = get_coef_context(nb, token_cache, c);
|
||||||
|
cost += (*tok_cost_ptr)[pt][tok];
|
||||||
|
token_cache[rc] = vp9_pt_energy_class[tok];
|
||||||
|
if (!--band_left) {
|
||||||
|
band_left = *band_count++;
|
||||||
|
++token_costs;
|
||||||
|
}
|
||||||
|
tok_cost_ptr = &((*token_costs)[!tok]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// eob token
|
||||||
|
if (band_left) {
|
||||||
pt = get_coef_context(nb, token_cache, c);
|
pt = get_coef_context(nb, token_cache, c);
|
||||||
cost += (*token_costs)[0][pt][EOB_TOKEN];
|
cost += (*token_costs)[0][pt][EOB_TOKEN];
|
||||||
}
|
}
|
||||||
|
@ -50,6 +50,35 @@ static const TOKENVALUE dct_cat_lt_10_value_tokens[] = {
|
|||||||
const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens +
|
const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens +
|
||||||
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens))
|
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens))
|
||||||
/ 2;
|
/ 2;
|
||||||
|
// The corresponding costs of the extrabits for the tokens in the above table
|
||||||
|
// are stored in the table below. The values are obtained from looking up the
|
||||||
|
// entry for the specified extrabits in the table corresponding to the token
|
||||||
|
// (as defined in cost element vp9_extra_bits)
|
||||||
|
// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1]
|
||||||
|
static const int dct_cat_lt_10_value_cost[] = {
|
||||||
|
3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531,
|
||||||
|
3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190,
|
||||||
|
3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894,
|
||||||
|
2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553,
|
||||||
|
3197, 3116, 3058, 2977, 2881, 2800,
|
||||||
|
2742, 2661, 2615, 2534, 2476, 2395,
|
||||||
|
2299, 2218, 2160, 2079,
|
||||||
|
2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652,
|
||||||
|
1893, 1696, 1453, 1256, 1229, 864,
|
||||||
|
512, 512, 512, 512, 0,
|
||||||
|
512, 512, 512, 512,
|
||||||
|
864, 1229, 1256, 1453, 1696, 1893,
|
||||||
|
1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566,
|
||||||
|
2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615,
|
||||||
|
2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197,
|
||||||
|
2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795,
|
||||||
|
2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
|
||||||
|
3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432,
|
||||||
|
3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773,
|
||||||
|
};
|
||||||
|
const int *vp9_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost +
|
||||||
|
(sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost))
|
||||||
|
/ 2;
|
||||||
|
|
||||||
// Array indices are identical to previously-existing CONTEXT_NODE indices
|
// Array indices are identical to previously-existing CONTEXT_NODE indices
|
||||||
const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
|
const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
|
||||||
|
@ -74,6 +74,7 @@ extern const int16_t *vp9_dct_value_cost_ptr;
|
|||||||
*/
|
*/
|
||||||
extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
|
extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
|
||||||
extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens;
|
extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens;
|
||||||
|
extern const int *vp9_dct_cat_lt_10_value_cost;
|
||||||
extern const int16_t vp9_cat6_low_cost[256];
|
extern const int16_t vp9_cat6_low_cost[256];
|
||||||
extern const int vp9_cat6_high_cost[64];
|
extern const int vp9_cat6_high_cost[64];
|
||||||
extern const int vp9_cat6_high10_high_cost[256];
|
extern const int vp9_cat6_high10_high_cost[256];
|
||||||
@ -117,6 +118,18 @@ static INLINE int16_t vp9_get_token(int v) {
|
|||||||
return vp9_dct_cat_lt_10_value_tokens[v].token;
|
return vp9_dct_cat_lt_10_value_tokens[v].token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static INLINE int vp9_get_token_cost(int v, int16_t *token,
|
||||||
|
const int *cat6_high_table) {
|
||||||
|
if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
|
||||||
|
EXTRABIT extrabits;
|
||||||
|
*token = CATEGORY6_TOKEN;
|
||||||
|
extrabits = abs(v) - CAT6_MIN_VAL;
|
||||||
|
return vp9_cat6_low_cost[extrabits & 0xff] +
|
||||||
|
cat6_high_table[extrabits >> 8];
|
||||||
|
}
|
||||||
|
*token = vp9_dct_cat_lt_10_value_tokens[v].token;
|
||||||
|
return vp9_dct_cat_lt_10_value_cost[v];
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
|
Loading…
Reference in New Issue
Block a user