Merge "Make get_coef_context() branchless."

This commit is contained in:
Ronald S. Bultje 2013-07-02 11:48:15 -07:00 committed by Gerrit Code Review
commit 3cc6eb7c00
6 changed files with 75 additions and 108 deletions

View File

@ -461,25 +461,25 @@ void vp9_default_coef_probs(VP9_COMMON *pc) {
// for each position in raster scan order.
// -1 indicates the neighbor does not exist.
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
vp9_default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
vp9_col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]);
vp9_row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
vp9_col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
vp9_row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_8x8_neighbors[64 * MAX_NEIGHBORS]);
vp9_default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_col_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
vp9_col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_row_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
vp9_row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_16x16_neighbors[256 * MAX_NEIGHBORS]);
vp9_default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t,
vp9_default_scan_32x32_neighbors[1024 * MAX_NEIGHBORS]);
vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]);
DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_4x4[16]);
DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_4x4[16]);
@ -504,15 +504,17 @@ static int find_in_scan(const int16_t *scan, int l, int idx) {
}
static void init_scan_neighbors(const int16_t *scan,
int16_t *iscan,
int l, int16_t *neighbors,
int max_neighbors) {
int l, int16_t *neighbors) {
int l2 = l * l;
int n, i, j;
for (n = 0; n < l2; n++) {
// dc doesn't use this type of prediction
neighbors[MAX_NEIGHBORS * 0 + 0] = 0;
neighbors[MAX_NEIGHBORS * 0 + 1] = 0;
iscan[0] = find_in_scan(scan, l, 0);
for (n = 1; n < l2; n++) {
int rc = scan[n];
iscan[n] = find_in_scan(scan, l, n);
assert(max_neighbors == MAX_NEIGHBORS);
i = rc / l;
j = rc % l;
if (i > 0 && j > 0) {
@ -524,93 +526,84 @@ static void init_scan_neighbors(const int16_t *scan,
// Therefore, if we use ADST/DCT, prefer the DCT neighbor coeff
// as a context. If ADST or DCT is used in both directions, we
// use the combination of the two as a context.
int a = find_in_scan(scan, l, (i - 1) * l + j);
int b = find_in_scan(scan, l, i * l + j - 1);
int a = (i - 1) * l + j;
int b = i * l + j - 1;
if (scan == vp9_col_scan_4x4 || scan == vp9_col_scan_8x8 ||
scan == vp9_col_scan_16x16) {
neighbors[max_neighbors * n + 0] = a;
neighbors[max_neighbors * n + 1] = -1;
// in the col/row scan cases (as well as left/top edge cases), we set
// both contexts to the same value, so we can branchlessly do a+b+1>>1
// which automatically becomes a if a == b
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = a;
} else if (scan == vp9_row_scan_4x4 || scan == vp9_row_scan_8x8 ||
scan == vp9_row_scan_16x16) {
neighbors[max_neighbors * n + 0] = b;
neighbors[max_neighbors * n + 1] = -1;
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = b;
} else {
neighbors[max_neighbors * n + 0] = a;
neighbors[max_neighbors * n + 1] = b;
neighbors[MAX_NEIGHBORS * n + 0] = a;
neighbors[MAX_NEIGHBORS * n + 1] = b;
}
} else if (i > 0) {
neighbors[max_neighbors * n + 0] = find_in_scan(scan, l, (i - 1) * l + j);
neighbors[max_neighbors * n + 1] = -1;
} else if (j > 0) {
neighbors[max_neighbors * n + 0] =
find_in_scan(scan, l, i * l + j - 1);
neighbors[max_neighbors * n + 1] = -1;
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = (i - 1) * l + j;
} else {
assert(n == 0);
// dc predictor doesn't use previous tokens
neighbors[max_neighbors * n + 0] = -1;
assert(j > 0);
neighbors[MAX_NEIGHBORS * n + 0] =
neighbors[MAX_NEIGHBORS * n + 1] = i * l + j - 1;
}
assert(neighbors[max_neighbors * n + 0] < n);
assert(iscan[neighbors[MAX_NEIGHBORS * n + 0]] < n);
}
// one padding item so we don't have to add branches in code to handle
// calls to get_coef_context() for the token after the final dc token
neighbors[MAX_NEIGHBORS * l2 + 0] = 0;
neighbors[MAX_NEIGHBORS * l2 + 1] = 0;
}
void vp9_init_neighbors() {
init_scan_neighbors(vp9_default_scan_4x4, vp9_default_iscan_4x4, 4,
vp9_default_scan_4x4_neighbors, MAX_NEIGHBORS);
vp9_default_scan_4x4_neighbors);
init_scan_neighbors(vp9_row_scan_4x4, vp9_row_iscan_4x4, 4,
vp9_row_scan_4x4_neighbors, MAX_NEIGHBORS);
vp9_row_scan_4x4_neighbors);
init_scan_neighbors(vp9_col_scan_4x4, vp9_col_iscan_4x4, 4,
vp9_col_scan_4x4_neighbors, MAX_NEIGHBORS);
vp9_col_scan_4x4_neighbors);
init_scan_neighbors(vp9_default_scan_8x8, vp9_default_iscan_8x8, 8,
vp9_default_scan_8x8_neighbors, MAX_NEIGHBORS);
vp9_default_scan_8x8_neighbors);
init_scan_neighbors(vp9_row_scan_8x8, vp9_row_iscan_8x8, 8,
vp9_row_scan_8x8_neighbors, MAX_NEIGHBORS);
vp9_row_scan_8x8_neighbors);
init_scan_neighbors(vp9_col_scan_8x8, vp9_col_iscan_8x8, 8,
vp9_col_scan_8x8_neighbors, MAX_NEIGHBORS);
vp9_col_scan_8x8_neighbors);
init_scan_neighbors(vp9_default_scan_16x16, vp9_default_iscan_16x16, 16,
vp9_default_scan_16x16_neighbors, MAX_NEIGHBORS);
vp9_default_scan_16x16_neighbors);
init_scan_neighbors(vp9_row_scan_16x16, vp9_row_iscan_16x16, 16,
vp9_row_scan_16x16_neighbors, MAX_NEIGHBORS);
vp9_row_scan_16x16_neighbors);
init_scan_neighbors(vp9_col_scan_16x16, vp9_col_iscan_16x16, 16,
vp9_col_scan_16x16_neighbors, MAX_NEIGHBORS);
vp9_col_scan_16x16_neighbors);
init_scan_neighbors(vp9_default_scan_32x32, vp9_default_iscan_32x32, 32,
vp9_default_scan_32x32_neighbors, MAX_NEIGHBORS);
vp9_default_scan_32x32_neighbors);
}
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan, int *pad) {
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) {
if (scan == vp9_default_scan_4x4) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_4x4_neighbors;
} else if (scan == vp9_row_scan_4x4) {
*pad = MAX_NEIGHBORS;
return vp9_row_scan_4x4_neighbors;
} else if (scan == vp9_col_scan_4x4) {
*pad = MAX_NEIGHBORS;
return vp9_col_scan_4x4_neighbors;
} else if (scan == vp9_default_scan_8x8) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_8x8_neighbors;
} else if (scan == vp9_row_scan_8x8) {
*pad = 2;
return vp9_row_scan_8x8_neighbors;
} else if (scan == vp9_col_scan_8x8) {
*pad = 2;
return vp9_col_scan_8x8_neighbors;
} else if (scan == vp9_default_scan_16x16) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_16x16_neighbors;
} else if (scan == vp9_row_scan_16x16) {
*pad = 2;
return vp9_row_scan_16x16_neighbors;
} else if (scan == vp9_col_scan_16x16) {
*pad = 2;
return vp9_col_scan_16x16_neighbors;
} else if (scan == vp9_default_scan_32x32) {
*pad = MAX_NEIGHBORS;
return vp9_default_scan_32x32_neighbors;
} else {
assert(0);
return NULL;
assert(scan == vp9_default_scan_32x32);
return vp9_default_scan_32x32_neighbors;
}
}

View File

@ -166,28 +166,14 @@ static int get_coef_band(const uint8_t * band_translate, int coef_index) {
}
#define MAX_NEIGHBORS 2
static INLINE int get_coef_context(const int16_t *scan,
const int16_t *neighbors,
int nb_pad, uint8_t *token_cache,
int c, int l) {
int eob = l;
assert(nb_pad == MAX_NEIGHBORS);
if (c == eob) {
return 0;
} else {
int ctx;
assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] +
token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1;
} else {
ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]];
}
return ctx;
}
static INLINE int get_coef_context(const int16_t *neighbors,
uint8_t *token_cache,
int c) {
return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] +
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan, int *pad);
const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
// 128 lists of probabilities are stored for the following ONE node probs:

View File

@ -97,7 +97,7 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
TX_SIZE txfm_size, const int16_t *dq,
ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) {
ENTROPY_CONTEXT above_ec, left_ec;
int pt, c = 0, pad, default_eob;
int pt, c = 0;
int band;
vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES];
vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
@ -130,7 +130,6 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
scan = get_scan_4x4(tx_type);
above_ec = A[0] != 0;
left_ec = L[0] != 0;
default_eob = 16;
band_translate = vp9_coefband_trans_4x4;
break;
}
@ -140,7 +139,6 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
scan = get_scan_8x8(tx_type);
above_ec = (A[0] + A[1]) != 0;
left_ec = (L[0] + L[1]) != 0;
default_eob = 64;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
@ -150,7 +148,6 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
scan = get_scan_16x16(tx_type);
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
default_eob = 256;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
@ -158,13 +155,12 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
scan = vp9_default_scan_32x32;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
default_eob = 1024;
band_translate = vp9_coefband_trans_8x8plus;
break;
}
pt = combine_entropy_contexts(above_ec, left_ec);
nb = vp9_get_coef_neighbors_handle(scan, &pad);
nb = vp9_get_coef_neighbors_handle(scan);
while (1) {
int val;
@ -172,8 +168,7 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
if (c >= seg_eob)
break;
if (c)
pt = get_coef_context(scan, nb, pad, token_cache,
c, default_eob);
pt = get_coef_context(nb, token_cache, c);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];
#if !CONFIG_BALANCED_COEFTREE
@ -186,8 +181,7 @@ SKIP_START:
if (c >= seg_eob)
break;
if (c)
pt = get_coef_context(scan, nb, pad, token_cache,
c, default_eob);
pt = get_coef_context(nb, token_cache, c);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];

View File

@ -112,11 +112,10 @@ static const int plane_rd_mult[4] = {
static int trellis_get_coeff_context(const int16_t *scan,
const int16_t *nb,
int idx, int token,
uint8_t *token_cache,
int pad, int l) {
uint8_t *token_cache) {
int bak = token_cache[scan[idx]], pt;
token_cache[scan[idx]] = vp9_pt_energy_class[token];
pt = get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
pt = get_coef_context(nb, token_cache, idx + 1);
token_cache[scan[idx]] = bak;
return pt;
}
@ -141,7 +140,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
int best, band, pt;
PLANE_TYPE type = xd->plane[plane].plane_type;
int err_mult = plane_rd_mult[type];
int default_eob, pad;
int default_eob;
const int16_t *scan, *nb;
const int mul = 1 + (tx_size == TX_32X32);
uint8_t token_cache[1024];
@ -201,7 +200,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
for (i = 0; i < eob; i++)
token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
qcoeff_ptr[scan[i]]].token];
nb = vp9_get_coef_neighbors_handle(scan, &pad);
nb = vp9_get_coef_neighbors_handle(scan);
for (i = eob; i-- > i0;) {
int base_bits, d2, dx;
@ -220,8 +219,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
/* Consider both possible successor states. */
if (next < default_eob) {
band = get_coef_band(band_translate, i + 1);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 +=
mb->token_costs[tx_size][type][ref][0][band][pt]
[tokens[next][0].token];
@ -273,14 +271,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
if (next < default_eob) {
band = get_coef_band(band_translate, i + 1);
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += mb->token_costs[tx_size][type][ref][!x][band][pt]
[tokens[next][0].token];
}
if (t1 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
pad, default_eob);
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
rate1 += mb->token_costs[tx_size][type][ref][!x][band][pt]
[tokens[next][1].token];
}

View File

@ -304,7 +304,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt;
int c = 0;
int cost = 0, pad;
int cost = 0;
const int16_t *scan, *nb;
const int eob = xd->plane[plane].eobs[block];
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
@ -314,7 +314,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
ENTROPY_CONTEXT above_ec, left_ec;
TX_TYPE tx_type = DCT_DCT;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
int seg_eob, default_eob;
int seg_eob;
uint8_t token_cache[1024];
const uint8_t * band_translate;
@ -372,8 +372,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
assert(eob <= seg_eob);
pt = combine_entropy_contexts(above_ec, left_ec);
nb = vp9_get_coef_neighbors_handle(scan, &pad);
default_eob = seg_eob;
nb = vp9_get_coef_neighbors_handle(scan);
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
seg_eob = 0;
@ -402,7 +401,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
v = qcoeff_ptr[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
pt = get_coef_context(nb, token_cache, c);
cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
token_cache[rc] = vp9_pt_energy_class[t];
prev_t = t;
@ -410,7 +409,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
// eob token
if (c < seg_eob) {
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
pt = get_coef_context(nb, token_cache, c);
cost += token_costs[0][get_coef_band(band_translate, c)][pt]
[DCT_EOB_TOKEN];
}

View File

@ -123,7 +123,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
const int loff = (off >> mod) << tx_size;
ENTROPY_CONTEXT *A = xd->plane[plane].above_context + aoff;
ENTROPY_CONTEXT *L = xd->plane[plane].left_context + loff;
int seg_eob, default_eob, pad;
int seg_eob;
const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
vp9_coeff_count *counts;
@ -178,8 +178,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
}
pt = combine_entropy_contexts(above_ec, left_ec);
nb = vp9_get_coef_neighbors_handle(scan, &pad);
default_eob = seg_eob;
nb = vp9_get_coef_neighbors_handle(scan);
if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
seg_eob = 0;
@ -191,7 +190,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
int v = 0;
rc = scan[c];
if (c)
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
pt = get_coef_context(nb, token_cache, c);
if (c < eob) {
v = qcoeff_ptr[rc];
assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);