diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index ca8b87985..f5d5c1aee 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -461,25 +461,25 @@ void vp9_default_coef_probs(VP9_COMMON *pc) { // for each position in raster scan order. // -1 indicates the neighbor does not exist. DECLARE_ALIGNED(16, int16_t, - vp9_default_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); + vp9_default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_col_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); + vp9_col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_row_scan_4x4_neighbors[16 * MAX_NEIGHBORS]); + vp9_row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_col_scan_8x8_neighbors[64 * MAX_NEIGHBORS]); + vp9_col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_row_scan_8x8_neighbors[64 * MAX_NEIGHBORS]); + vp9_row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_default_scan_8x8_neighbors[64 * MAX_NEIGHBORS]); + vp9_default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_col_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); + vp9_col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_row_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); + vp9_row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_default_scan_16x16_neighbors[256 * MAX_NEIGHBORS]); + vp9_default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, - vp9_default_scan_32x32_neighbors[1024 * MAX_NEIGHBORS]); + vp9_default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]); DECLARE_ALIGNED(16, int16_t, vp9_default_iscan_4x4[16]); DECLARE_ALIGNED(16, int16_t, vp9_col_iscan_4x4[16]); @@ -504,15 +504,17 @@ static int find_in_scan(const int16_t *scan, int l, int idx) { } static void init_scan_neighbors(const int16_t *scan, int16_t *iscan, - int l, int16_t *neighbors, - int max_neighbors) { + int l, int16_t *neighbors) { int l2 = l * l; int n, i, j; - for (n = 0; n < l2; n++) { + // dc doesn't use this type of prediction + neighbors[MAX_NEIGHBORS * 0 + 0] = 0; + neighbors[MAX_NEIGHBORS * 0 + 1] = 0; + iscan[0] = find_in_scan(scan, l, 0); + for (n = 1; n < l2; n++) { int rc = scan[n]; iscan[n] = find_in_scan(scan, l, n); - assert(max_neighbors == MAX_NEIGHBORS); i = rc / l; j = rc % l; if (i > 0 && j > 0) { @@ -524,93 +526,84 @@ static void init_scan_neighbors(const int16_t *scan, // Therefore, if we use ADST/DCT, prefer the DCT neighbor coeff // as a context. If ADST or DCT is used in both directions, we // use the combination of the two as a context. - int a = find_in_scan(scan, l, (i - 1) * l + j); - int b = find_in_scan(scan, l, i * l + j - 1); + int a = (i - 1) * l + j; + int b = i * l + j - 1; if (scan == vp9_col_scan_4x4 || scan == vp9_col_scan_8x8 || scan == vp9_col_scan_16x16) { - neighbors[max_neighbors * n + 0] = a; - neighbors[max_neighbors * n + 1] = -1; + // in the col/row scan cases (as well as left/top edge cases), we set + // both contexts to the same value, so we can branchlessly do a+b+1>>1 + // which automatically becomes a if a == b + neighbors[MAX_NEIGHBORS * n + 0] = + neighbors[MAX_NEIGHBORS * n + 1] = a; } else if (scan == vp9_row_scan_4x4 || scan == vp9_row_scan_8x8 || scan == vp9_row_scan_16x16) { - neighbors[max_neighbors * n + 0] = b; - neighbors[max_neighbors * n + 1] = -1; + neighbors[MAX_NEIGHBORS * n + 0] = + neighbors[MAX_NEIGHBORS * n + 1] = b; } else { - neighbors[max_neighbors * n + 0] = a; - neighbors[max_neighbors * n + 1] = b; + neighbors[MAX_NEIGHBORS * n + 0] = a; + neighbors[MAX_NEIGHBORS * n + 1] = b; } } else if (i > 0) { - neighbors[max_neighbors * n + 0] = find_in_scan(scan, l, (i - 1) * l + j); - neighbors[max_neighbors * n + 1] = -1; - } else if (j > 0) { - neighbors[max_neighbors * n + 0] = - find_in_scan(scan, l, i * l + j - 1); - neighbors[max_neighbors * n + 1] = -1; + neighbors[MAX_NEIGHBORS * n + 0] = + neighbors[MAX_NEIGHBORS * n + 1] = (i - 1) * l + j; } else { - assert(n == 0); - // dc predictor doesn't use previous tokens - neighbors[max_neighbors * n + 0] = -1; + assert(j > 0); + neighbors[MAX_NEIGHBORS * n + 0] = + neighbors[MAX_NEIGHBORS * n + 1] = i * l + j - 1; } - assert(neighbors[max_neighbors * n + 0] < n); + assert(iscan[neighbors[MAX_NEIGHBORS * n + 0]] < n); } + // one padding item so we don't have to add branches in code to handle + // calls to get_coef_context() for the token after the final dc token + neighbors[MAX_NEIGHBORS * l2 + 0] = 0; + neighbors[MAX_NEIGHBORS * l2 + 1] = 0; } void vp9_init_neighbors() { init_scan_neighbors(vp9_default_scan_4x4, vp9_default_iscan_4x4, 4, - vp9_default_scan_4x4_neighbors, MAX_NEIGHBORS); + vp9_default_scan_4x4_neighbors); init_scan_neighbors(vp9_row_scan_4x4, vp9_row_iscan_4x4, 4, - vp9_row_scan_4x4_neighbors, MAX_NEIGHBORS); + vp9_row_scan_4x4_neighbors); init_scan_neighbors(vp9_col_scan_4x4, vp9_col_iscan_4x4, 4, - vp9_col_scan_4x4_neighbors, MAX_NEIGHBORS); + vp9_col_scan_4x4_neighbors); init_scan_neighbors(vp9_default_scan_8x8, vp9_default_iscan_8x8, 8, - vp9_default_scan_8x8_neighbors, MAX_NEIGHBORS); + vp9_default_scan_8x8_neighbors); init_scan_neighbors(vp9_row_scan_8x8, vp9_row_iscan_8x8, 8, - vp9_row_scan_8x8_neighbors, MAX_NEIGHBORS); + vp9_row_scan_8x8_neighbors); init_scan_neighbors(vp9_col_scan_8x8, vp9_col_iscan_8x8, 8, - vp9_col_scan_8x8_neighbors, MAX_NEIGHBORS); + vp9_col_scan_8x8_neighbors); init_scan_neighbors(vp9_default_scan_16x16, vp9_default_iscan_16x16, 16, - vp9_default_scan_16x16_neighbors, MAX_NEIGHBORS); + vp9_default_scan_16x16_neighbors); init_scan_neighbors(vp9_row_scan_16x16, vp9_row_iscan_16x16, 16, - vp9_row_scan_16x16_neighbors, MAX_NEIGHBORS); + vp9_row_scan_16x16_neighbors); init_scan_neighbors(vp9_col_scan_16x16, vp9_col_iscan_16x16, 16, - vp9_col_scan_16x16_neighbors, MAX_NEIGHBORS); + vp9_col_scan_16x16_neighbors); init_scan_neighbors(vp9_default_scan_32x32, vp9_default_iscan_32x32, 32, - vp9_default_scan_32x32_neighbors, MAX_NEIGHBORS); + vp9_default_scan_32x32_neighbors); } -const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan, int *pad) { +const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) { if (scan == vp9_default_scan_4x4) { - *pad = MAX_NEIGHBORS; return vp9_default_scan_4x4_neighbors; } else if (scan == vp9_row_scan_4x4) { - *pad = MAX_NEIGHBORS; return vp9_row_scan_4x4_neighbors; } else if (scan == vp9_col_scan_4x4) { - *pad = MAX_NEIGHBORS; return vp9_col_scan_4x4_neighbors; } else if (scan == vp9_default_scan_8x8) { - *pad = MAX_NEIGHBORS; return vp9_default_scan_8x8_neighbors; } else if (scan == vp9_row_scan_8x8) { - *pad = 2; return vp9_row_scan_8x8_neighbors; } else if (scan == vp9_col_scan_8x8) { - *pad = 2; return vp9_col_scan_8x8_neighbors; } else if (scan == vp9_default_scan_16x16) { - *pad = MAX_NEIGHBORS; return vp9_default_scan_16x16_neighbors; } else if (scan == vp9_row_scan_16x16) { - *pad = 2; return vp9_row_scan_16x16_neighbors; } else if (scan == vp9_col_scan_16x16) { - *pad = 2; return vp9_col_scan_16x16_neighbors; - } else if (scan == vp9_default_scan_32x32) { - *pad = MAX_NEIGHBORS; - return vp9_default_scan_32x32_neighbors; } else { - assert(0); - return NULL; + assert(scan == vp9_default_scan_32x32); + return vp9_default_scan_32x32_neighbors; } } diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 5937efa3a..68c36eaef 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -166,28 +166,14 @@ static int get_coef_band(const uint8_t * band_translate, int coef_index) { } #define MAX_NEIGHBORS 2 -static INLINE int get_coef_context(const int16_t *scan, - const int16_t *neighbors, - int nb_pad, uint8_t *token_cache, - int c, int l) { - int eob = l; - assert(nb_pad == MAX_NEIGHBORS); - if (c == eob) { - return 0; - } else { - int ctx; - assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0); - if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) { - ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] + - token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1; - } else { - ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]]; - } - return ctx; - } +static INLINE int get_coef_context(const int16_t *neighbors, + uint8_t *token_cache, + int c) { + return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + + token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; } -const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan, int *pad); +const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan); // 128 lists of probabilities are stored for the following ONE node probs: diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 81403a466..76889c477 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -97,7 +97,7 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd, TX_SIZE txfm_size, const int16_t *dq, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { ENTROPY_CONTEXT above_ec, left_ec; - int pt, c = 0, pad, default_eob; + int pt, c = 0; int band; vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES]; vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; @@ -130,7 +130,6 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd, scan = get_scan_4x4(tx_type); above_ec = A[0] != 0; left_ec = L[0] != 0; - default_eob = 16; band_translate = vp9_coefband_trans_4x4; break; } @@ -140,7 +139,6 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd, scan = get_scan_8x8(tx_type); above_ec = (A[0] + A[1]) != 0; left_ec = (L[0] + L[1]) != 0; - default_eob = 64; band_translate = vp9_coefband_trans_8x8plus; break; } @@ -150,7 +148,6 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd, scan = get_scan_16x16(tx_type); above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; - default_eob = 256; band_translate = vp9_coefband_trans_8x8plus; break; } @@ -158,13 +155,12 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd, scan = vp9_default_scan_32x32; above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; - default_eob = 1024; band_translate = vp9_coefband_trans_8x8plus; break; } pt = combine_entropy_contexts(above_ec, left_ec); - nb = vp9_get_coef_neighbors_handle(scan, &pad); + nb = vp9_get_coef_neighbors_handle(scan); while (1) { int val; @@ -172,8 +168,7 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd, if (c >= seg_eob) break; if (c) - pt = get_coef_context(scan, nb, pad, token_cache, - c, default_eob); + pt = get_coef_context(nb, token_cache, c); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; #if !CONFIG_BALANCED_COEFTREE @@ -186,8 +181,7 @@ SKIP_START: if (c >= seg_eob) break; if (c) - pt = get_coef_context(scan, nb, pad, token_cache, - c, default_eob); + pt = get_coef_context(nb, token_cache, c); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index ccc3d12e6..d424f47f4 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -112,11 +112,10 @@ static const int plane_rd_mult[4] = { static int trellis_get_coeff_context(const int16_t *scan, const int16_t *nb, int idx, int token, - uint8_t *token_cache, - int pad, int l) { + uint8_t *token_cache) { int bak = token_cache[scan[idx]], pt; token_cache[scan[idx]] = vp9_pt_energy_class[token]; - pt = get_coef_context(scan, nb, pad, token_cache, idx + 1, l); + pt = get_coef_context(nb, token_cache, idx + 1); token_cache[scan[idx]] = bak; return pt; } @@ -141,7 +140,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, int best, band, pt; PLANE_TYPE type = xd->plane[plane].plane_type; int err_mult = plane_rd_mult[type]; - int default_eob, pad; + int default_eob; const int16_t *scan, *nb; const int mul = 1 + (tx_size == TX_32X32); uint8_t token_cache[1024]; @@ -201,7 +200,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, for (i = 0; i < eob; i++) token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ qcoeff_ptr[scan[i]]].token]; - nb = vp9_get_coef_neighbors_handle(scan, &pad); + nb = vp9_get_coef_neighbors_handle(scan); for (i = eob; i-- > i0;) { int base_bits, d2, dx; @@ -220,8 +219,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, /* Consider both possible successor states. */ if (next < default_eob) { band = get_coef_band(band_translate, i + 1); - pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, - pad, default_eob); + pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); rate0 += mb->token_costs[tx_size][type][ref][0][band][pt] [tokens[next][0].token]; @@ -273,14 +271,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, if (next < default_eob) { band = get_coef_band(band_translate, i + 1); if (t0 != DCT_EOB_TOKEN) { - pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache, - pad, default_eob); + pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); rate0 += mb->token_costs[tx_size][type][ref][!x][band][pt] [tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { - pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache, - pad, default_eob); + pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); rate1 += mb->token_costs[tx_size][type][ref][!x][band][pt] [tokens[next][1].token]; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 77272f0cd..22a2c4121 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -304,7 +304,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; int c = 0; - int cost = 0, pad; + int cost = 0; const int16_t *scan, *nb; const int eob = xd->plane[plane].eobs[block]; const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); @@ -314,7 +314,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, ENTROPY_CONTEXT above_ec, left_ec; TX_TYPE tx_type = DCT_DCT; const int segment_id = xd->mode_info_context->mbmi.segment_id; - int seg_eob, default_eob; + int seg_eob; uint8_t token_cache[1024]; const uint8_t * band_translate; @@ -372,8 +372,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, assert(eob <= seg_eob); pt = combine_entropy_contexts(above_ec, left_ec); - nb = vp9_get_coef_neighbors_handle(scan, &pad); - default_eob = seg_eob; + nb = vp9_get_coef_neighbors_handle(scan); if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; @@ -402,7 +401,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, v = qcoeff_ptr[rc]; t = vp9_dct_value_tokens_ptr[v].token; - pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob); + pt = get_coef_context(nb, token_cache, c); cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v]; token_cache[rc] = vp9_pt_energy_class[t]; prev_t = t; @@ -410,7 +409,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, // eob token if (c < seg_eob) { - pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob); + pt = get_coef_context(nb, token_cache, c); cost += token_costs[0][get_coef_band(band_translate, c)][pt] [DCT_EOB_TOKEN]; } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 91c2a14d1..ee129a060 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -123,7 +123,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, const int loff = (off >> mod) << tx_size; ENTROPY_CONTEXT *A = xd->plane[plane].above_context + aoff; ENTROPY_CONTEXT *L = xd->plane[plane].left_context + loff; - int seg_eob, default_eob, pad; + int seg_eob; const int segment_id = mbmi->segment_id; const int16_t *scan, *nb; vp9_coeff_count *counts; @@ -178,8 +178,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, } pt = combine_entropy_contexts(above_ec, left_ec); - nb = vp9_get_coef_neighbors_handle(scan, &pad); - default_eob = seg_eob; + nb = vp9_get_coef_neighbors_handle(scan); if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) seg_eob = 0; @@ -191,7 +190,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, int v = 0; rc = scan[c]; if (c) - pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob); + pt = get_coef_context(nb, token_cache, c); if (c < eob) { v = qcoeff_ptr[rc]; assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);