From 74a679de6f66b6a30d7399ae427ce2b85a967824 Mon Sep 17 00:00:00 2001 From: Julia Robson Date: Fri, 26 Feb 2016 14:42:25 -0800 Subject: [PATCH] Port "cost_coeff speed improvements" to vp9. About a 5% faster overall encode (perf cycles) at speed zero! Change-Id: Iaf013ba75884415cd824e98349f654ffb1c3ef33 --- vp9/encoder/vp9_rdopt.c | 91 ++++++++++++++++++++++++-------------- vp9/encoder/vp9_tokenize.c | 29 ++++++++++++ vp9/encoder/vp9_tokenize.h | 13 ++++++ 3 files changed, 99 insertions(+), 34 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 193c9d33c..508c59663 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -387,47 +387,70 @@ static int cost_coeffs(MACROBLOCK *x, cost = token_costs[0][0][pt][EOB_TOKEN]; c = 0; } else { - int band_left = *band_count++; + if (use_fast_coef_costing) { + int band_left = *band_count++; - // dc token - int v = qcoeff[0]; - int16_t prev_t; - EXTRABIT e; - vp9_get_token_extra(v, &prev_t, &e); - cost = (*token_costs)[0][pt][prev_t] + - vp9_get_cost(prev_t, e, cat6_high_cost); + // dc token + int v = qcoeff[0]; + int16_t prev_t; + cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost); + cost += (*token_costs)[0][pt][prev_t]; - token_cache[0] = vp9_pt_energy_class[prev_t]; - ++token_costs; + token_cache[0] = vp9_pt_energy_class[prev_t]; + ++token_costs; - // ac tokens - for (c = 1; c < eob; c++) { - const int rc = scan[c]; - int16_t t; + // ac tokens + for (c = 1; c < eob; c++) { + const int rc = scan[c]; + int16_t t; - v = qcoeff[rc]; - vp9_get_token_extra(v, &t, &e); - if (use_fast_coef_costing) { - cost += (*token_costs)[!prev_t][!prev_t][t] + - vp9_get_cost(t, e, cat6_high_cost); - } else { - pt = get_coef_context(nb, token_cache, c); - cost += (*token_costs)[!prev_t][pt][t] + - vp9_get_cost(t, e, cat6_high_cost); - token_cache[rc] = vp9_pt_energy_class[t]; + v = qcoeff[rc]; + cost += vp9_get_token_cost(v, &t, cat6_high_cost); + cost += (*token_costs)[!prev_t][!prev_t][t]; + prev_t = t; + if (!--band_left) { + band_left = *band_count++; + ++token_costs; + } } - prev_t = t; - if (!--band_left) { - band_left = *band_count++; - ++token_costs; - } - } - // eob token - if (band_left) { - if (use_fast_coef_costing) { + // eob token + if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; - } else { + + } else { // !use_fast_coef_costing + int band_left = *band_count++; + + // dc token + int v = qcoeff[0]; + int16_t tok; + unsigned int (*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS]; + cost = vp9_get_token_cost(v, &tok, cat6_high_cost); + cost += (*token_costs)[0][pt][tok]; + + token_cache[0] = vp9_pt_energy_class[tok]; + ++token_costs; + + tok_cost_ptr = &((*token_costs)[!tok]); + + // ac tokens + for (c = 1; c < eob; c++) { + const int rc = scan[c]; + + v = qcoeff[rc]; + cost += vp9_get_token_cost(v, &tok, cat6_high_cost); + pt = get_coef_context(nb, token_cache, c); + cost += (*tok_cost_ptr)[pt][tok]; + token_cache[rc] = vp9_pt_energy_class[tok]; + if (!--band_left) { + band_left = *band_count++; + ++token_costs; + } + tok_cost_ptr = &((*token_costs)[!tok]); + } + + // eob token + if (band_left) { pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[0][pt][EOB_TOKEN]; } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 93be6d7ae..ee1d08adc 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -50,6 +50,35 @@ static const TOKENVALUE dct_cat_lt_10_value_tokens[] = { const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens + (sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens)) / 2; +// The corresponding costs of the extrabits for the tokens in the above table +// are stored in the table below. The values are obtained from looking up the +// entry for the specified extrabits in the table corresponding to the token +// (as defined in cost element vp9_extra_bits) +// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1] +static const int dct_cat_lt_10_value_cost[] = { + 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531, + 3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190, + 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894, + 2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553, + 3197, 3116, 3058, 2977, 2881, 2800, + 2742, 2661, 2615, 2534, 2476, 2395, + 2299, 2218, 2160, 2079, + 2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652, + 1893, 1696, 1453, 1256, 1229, 864, + 512, 512, 512, 512, 0, + 512, 512, 512, 512, + 864, 1229, 1256, 1453, 1696, 1893, + 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566, + 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615, + 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197, + 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, + 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136, + 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, + 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773, +}; +const int *vp9_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost + + (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost)) + / 2; // Array indices are identical to previously-existing CONTEXT_NODE indices const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index df979b25d..fad798886 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -74,6 +74,7 @@ extern const int16_t *vp9_dct_value_cost_ptr; */ extern const TOKENVALUE *vp9_dct_value_tokens_ptr; extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens; +extern const int *vp9_dct_cat_lt_10_value_cost; extern const int16_t vp9_cat6_low_cost[256]; extern const int vp9_cat6_high_cost[64]; extern const int vp9_cat6_high10_high_cost[256]; @@ -117,6 +118,18 @@ static INLINE int16_t vp9_get_token(int v) { return vp9_dct_cat_lt_10_value_tokens[v].token; } +static INLINE int vp9_get_token_cost(int v, int16_t *token, + const int *cat6_high_table) { + if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) { + EXTRABIT extrabits; + *token = CATEGORY6_TOKEN; + extrabits = abs(v) - CAT6_MIN_VAL; + return vp9_cat6_low_cost[extrabits & 0xff] + + cat6_high_table[extrabits >> 8]; + } + *token = vp9_dct_cat_lt_10_value_tokens[v].token; + return vp9_dct_cat_lt_10_value_cost[v]; +} #ifdef __cplusplus } // extern "C"