token_cache changes in decoder
Removes stack-alocation of token_cache in decode_coefs function Seems to achieve about 1% decode speed improvement as tested on 25 480p videos. Change-Id: I8e7eb3361fa09d9654dfad0677a6d606701fdc6e
This commit is contained in:
@@ -41,6 +41,7 @@ typedef struct TileWorkerData {
|
|||||||
VP9_COMMON *cm;
|
VP9_COMMON *cm;
|
||||||
vp9_reader bit_reader;
|
vp9_reader bit_reader;
|
||||||
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
|
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
|
||||||
|
DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
|
||||||
} TileWorkerData;
|
} TileWorkerData;
|
||||||
|
|
||||||
static int read_be32(const uint8_t *p) {
|
static int read_be32(const uint8_t *p) {
|
||||||
@@ -297,6 +298,7 @@ struct intra_args {
|
|||||||
VP9_COMMON *cm;
|
VP9_COMMON *cm;
|
||||||
MACROBLOCKD *xd;
|
MACROBLOCKD *xd;
|
||||||
vp9_reader *r;
|
vp9_reader *r;
|
||||||
|
unsigned char* token_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void predict_and_reconstruct_intra_block(int plane, int block,
|
static void predict_and_reconstruct_intra_block(int plane, int block,
|
||||||
@@ -326,7 +328,7 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
|
|||||||
|
|
||||||
if (!mi->mbmi.skip_coeff) {
|
if (!mi->mbmi.skip_coeff) {
|
||||||
vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size,
|
vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size,
|
||||||
args->r);
|
args->r, args->token_cache);
|
||||||
inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
|
inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -336,6 +338,7 @@ struct inter_args {
|
|||||||
MACROBLOCKD *xd;
|
MACROBLOCKD *xd;
|
||||||
vp9_reader *r;
|
vp9_reader *r;
|
||||||
int *eobtotal;
|
int *eobtotal;
|
||||||
|
unsigned char* token_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void reconstruct_inter_block(int plane, int block,
|
static void reconstruct_inter_block(int plane, int block,
|
||||||
@@ -346,7 +349,8 @@ static void reconstruct_inter_block(int plane, int block,
|
|||||||
MACROBLOCKD *const xd = args->xd;
|
MACROBLOCKD *const xd = args->xd;
|
||||||
|
|
||||||
*args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,
|
*args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,
|
||||||
plane_bsize, tx_size, args->r);
|
plane_bsize, tx_size,
|
||||||
|
args->r, args->token_cache);
|
||||||
inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
|
inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -398,7 +402,8 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
|||||||
static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
||||||
const TileInfo *const tile,
|
const TileInfo *const tile,
|
||||||
int mi_row, int mi_col,
|
int mi_row, int mi_col,
|
||||||
vp9_reader *r, BLOCK_SIZE bsize) {
|
vp9_reader *r, BLOCK_SIZE bsize,
|
||||||
|
unsigned char *token_cache) {
|
||||||
const int less8x8 = bsize < BLOCK_8X8;
|
const int less8x8 = bsize < BLOCK_8X8;
|
||||||
MB_MODE_INFO *mbmi;
|
MB_MODE_INFO *mbmi;
|
||||||
|
|
||||||
@@ -420,7 +425,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!is_inter_block(mbmi)) {
|
if (!is_inter_block(mbmi)) {
|
||||||
struct intra_args arg = { cm, xd, r };
|
struct intra_args arg = { cm, xd, r, token_cache };
|
||||||
foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block,
|
foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block,
|
||||||
&arg);
|
&arg);
|
||||||
} else {
|
} else {
|
||||||
@@ -438,7 +443,7 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
|||||||
// Reconstruction
|
// Reconstruction
|
||||||
if (!mbmi->skip_coeff) {
|
if (!mbmi->skip_coeff) {
|
||||||
int eobtotal = 0;
|
int eobtotal = 0;
|
||||||
struct inter_args arg = { cm, xd, r, &eobtotal };
|
struct inter_args arg = { cm, xd, r, &eobtotal, token_cache };
|
||||||
foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
|
foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
|
||||||
if (!less8x8 && eobtotal == 0)
|
if (!less8x8 && eobtotal == 0)
|
||||||
mbmi->skip_coeff = 1; // skip loopfilter
|
mbmi->skip_coeff = 1; // skip loopfilter
|
||||||
@@ -477,7 +482,8 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,
|
|||||||
static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
||||||
const TileInfo *const tile,
|
const TileInfo *const tile,
|
||||||
int mi_row, int mi_col,
|
int mi_row, int mi_col,
|
||||||
vp9_reader* r, BLOCK_SIZE bsize) {
|
vp9_reader* r, BLOCK_SIZE bsize,
|
||||||
|
unsigned char *token_cache) {
|
||||||
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
|
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
|
||||||
PARTITION_TYPE partition;
|
PARTITION_TYPE partition;
|
||||||
BLOCK_SIZE subsize;
|
BLOCK_SIZE subsize;
|
||||||
@@ -488,27 +494,33 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
|
|||||||
partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
|
partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
|
||||||
subsize = get_subsize(bsize, partition);
|
subsize = get_subsize(bsize, partition);
|
||||||
if (subsize < BLOCK_8X8) {
|
if (subsize < BLOCK_8X8) {
|
||||||
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
|
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
|
||||||
} else {
|
} else {
|
||||||
switch (partition) {
|
switch (partition) {
|
||||||
case PARTITION_NONE:
|
case PARTITION_NONE:
|
||||||
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
|
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
|
||||||
break;
|
break;
|
||||||
case PARTITION_HORZ:
|
case PARTITION_HORZ:
|
||||||
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
|
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
|
||||||
if (mi_row + hbs < cm->mi_rows)
|
if (mi_row + hbs < cm->mi_rows)
|
||||||
decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
|
decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
|
||||||
|
token_cache);
|
||||||
break;
|
break;
|
||||||
case PARTITION_VERT:
|
case PARTITION_VERT:
|
||||||
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
|
decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
|
||||||
if (mi_col + hbs < cm->mi_cols)
|
if (mi_col + hbs < cm->mi_cols)
|
||||||
decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
|
decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
|
||||||
|
token_cache);
|
||||||
break;
|
break;
|
||||||
case PARTITION_SPLIT:
|
case PARTITION_SPLIT:
|
||||||
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize);
|
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize,
|
||||||
decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
|
token_cache);
|
||||||
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
|
decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
|
||||||
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
|
token_cache);
|
||||||
|
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
|
||||||
|
token_cache);
|
||||||
|
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize,
|
||||||
|
token_cache);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(!"Invalid partition type");
|
assert(!"Invalid partition type");
|
||||||
@@ -791,7 +803,8 @@ static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile,
|
|||||||
vp9_zero(xd->left_seg_context);
|
vp9_zero(xd->left_seg_context);
|
||||||
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
|
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
|
||||||
mi_col += MI_BLOCK_SIZE)
|
mi_col += MI_BLOCK_SIZE)
|
||||||
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
|
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64,
|
||||||
|
pbi->token_cache);
|
||||||
|
|
||||||
if (pbi->do_loopfilter_inline) {
|
if (pbi->do_loopfilter_inline) {
|
||||||
const int lf_start = mi_row - MI_BLOCK_SIZE;
|
const int lf_start = mi_row - MI_BLOCK_SIZE;
|
||||||
@@ -935,7 +948,7 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int tile_worker_hook(void *arg1, void *arg2) {
|
static int tile_worker_hook(void *arg1, void *arg2) {
|
||||||
TileWorkerData *const tile_data = (TileWorkerData*)arg1;
|
TileWorkerData *tile_data = (TileWorkerData*)arg1;
|
||||||
const TileInfo *const tile = (TileInfo*)arg2;
|
const TileInfo *const tile = (TileInfo*)arg2;
|
||||||
int mi_row, mi_col;
|
int mi_row, mi_col;
|
||||||
|
|
||||||
@@ -944,9 +957,11 @@ static int tile_worker_hook(void *arg1, void *arg2) {
|
|||||||
vp9_zero(tile_data->xd.left_context);
|
vp9_zero(tile_data->xd.left_context);
|
||||||
vp9_zero(tile_data->xd.left_seg_context);
|
vp9_zero(tile_data->xd.left_seg_context);
|
||||||
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
|
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
|
||||||
mi_col += MI_BLOCK_SIZE)
|
mi_col += MI_BLOCK_SIZE) {
|
||||||
decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
|
decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
|
||||||
mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
|
mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64,
|
||||||
|
tile_data->token_cache);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return !tile_data->xd.corrupted;
|
return !tile_data->xd.corrupted;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,7 +70,6 @@ static const vp9_prob cat6_prob[15] = {
|
|||||||
DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \
|
DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \
|
||||||
token]; \
|
token]; \
|
||||||
} \
|
} \
|
||||||
token_cache[scan[c]] = vp9_pt_energy_class[token]; \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define WRITE_COEF_CONTINUE(val, token) \
|
#define WRITE_COEF_CONTINUE(val, token) \
|
||||||
@@ -78,6 +77,7 @@ static const vp9_prob cat6_prob[15] = {
|
|||||||
qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
|
qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
|
||||||
dq[c > 0] / (1 + (tx_size == TX_32X32)); \
|
dq[c > 0] / (1 + (tx_size == TX_32X32)); \
|
||||||
INCREMENT_COUNT(token); \
|
INCREMENT_COUNT(token); \
|
||||||
|
token_cache[scan[c]] = vp9_pt_energy_class[token]; \
|
||||||
c++; \
|
c++; \
|
||||||
continue; \
|
continue; \
|
||||||
}
|
}
|
||||||
@@ -91,7 +91,8 @@ static const vp9_prob cat6_prob[15] = {
|
|||||||
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
|
static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
|
||||||
vp9_reader *r, int block_idx,
|
vp9_reader *r, int block_idx,
|
||||||
PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
|
PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
|
||||||
TX_SIZE tx_size, const int16_t *dq, int pt) {
|
TX_SIZE tx_size, const int16_t *dq, int pt,
|
||||||
|
uint8_t *token_cache) {
|
||||||
const FRAME_CONTEXT *const fc = &cm->fc;
|
const FRAME_CONTEXT *const fc = &cm->fc;
|
||||||
FRAME_COUNTS *const counts = &cm->counts;
|
FRAME_COUNTS *const counts = &cm->counts;
|
||||||
const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
|
const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
|
||||||
@@ -104,7 +105,6 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
|
|||||||
vp9_coeff_count_model *coef_counts = counts->coef[tx_size];
|
vp9_coeff_count_model *coef_counts = counts->coef[tx_size];
|
||||||
const int16_t *scan, *nb;
|
const int16_t *scan, *nb;
|
||||||
const uint8_t *const band_translate = get_band_translate(tx_size);
|
const uint8_t *const band_translate = get_band_translate(tx_size);
|
||||||
uint8_t token_cache[1024];
|
|
||||||
get_scan(xd, tx_size, type, block_idx, &scan, &nb);
|
get_scan(xd, tx_size, type, block_idx, &scan, &nb);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
@@ -131,6 +131,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
|
|||||||
|
|
||||||
if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
|
if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
|
||||||
INCREMENT_COUNT(ZERO_TOKEN);
|
INCREMENT_COUNT(ZERO_TOKEN);
|
||||||
|
token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN];
|
||||||
++c;
|
++c;
|
||||||
goto SKIP_START;
|
goto SKIP_START;
|
||||||
}
|
}
|
||||||
@@ -212,7 +213,8 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
|
|||||||
|
|
||||||
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
|
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||||
int plane, int block, BLOCK_SIZE plane_bsize,
|
int plane, int block, BLOCK_SIZE plane_bsize,
|
||||||
TX_SIZE tx_size, vp9_reader *r) {
|
TX_SIZE tx_size, vp9_reader *r,
|
||||||
|
uint8_t *token_cache) {
|
||||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||||
const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id,
|
const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id,
|
||||||
tx_size);
|
tx_size);
|
||||||
@@ -223,7 +225,7 @@ int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
|
|||||||
|
|
||||||
eob = decode_coefs(cm, xd, r, block,
|
eob = decode_coefs(cm, xd, r, block,
|
||||||
pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block),
|
pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block),
|
||||||
tx_size, pd->dequant, pt);
|
tx_size, pd->dequant, pt, token_cache);
|
||||||
|
|
||||||
set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff);
|
set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff);
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
|
|
||||||
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
|
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||||
int plane, int block, BLOCK_SIZE plane_bsize,
|
int plane, int block, BLOCK_SIZE plane_bsize,
|
||||||
TX_SIZE tx_size, vp9_reader *r);
|
TX_SIZE tx_size, vp9_reader *r,
|
||||||
|
uint8_t *token_cache);
|
||||||
|
|
||||||
#endif // VP9_DECODER_VP9_DETOKENIZE_H_
|
#endif // VP9_DECODER_VP9_DETOKENIZE_H_
|
||||||
|
|||||||
@@ -49,6 +49,8 @@ typedef struct VP9Decompressor {
|
|||||||
|
|
||||||
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
|
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
|
||||||
PARTITION_CONTEXT *above_seg_context;
|
PARTITION_CONTEXT *above_seg_context;
|
||||||
|
|
||||||
|
DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
|
||||||
} VP9D_COMP;
|
} VP9D_COMP;
|
||||||
|
|
||||||
#endif // VP9_DECODER_VP9_ONYXD_INT_H_
|
#endif // VP9_DECODER_VP9_ONYXD_INT_H_
|
||||||
|
|||||||
Reference in New Issue
Block a user