From 911f6b036e0e1c836edb92f9c01f3021aabe3115 Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Fri, 11 Sep 2015 14:17:59 -0700 Subject: [PATCH] tmp Change-Id: I6121c07f4af8065c1f4c4488e50990f3d71cc4c1 --- vp10/common/ans.h | 23 ++++++++++++- vp10/decoder/decodeframe.c | 66 +++++++++++++++++++++++++------------ vp10/decoder/decoder.h | 5 +-- vp10/decoder/detokenize.c | 67 +++++++++++++++++++++++--------------- vp10/decoder/detokenize.h | 8 +++-- 5 files changed, 116 insertions(+), 53 deletions(-) diff --git a/vp10/common/ans.h b/vp10/common/ans.h index 50c2b7344..6e45a659f 100644 --- a/vp10/common/ans.h +++ b/vp10/common/ans.h @@ -34,6 +34,12 @@ struct AnsCoder { uint32_t state; }; +struct AnsDecoder { + const uint8_t *buf; + int buf_offset; + uint32_t state; +}; + typedef uint8_t AnsP8; #define ans_p8_precision 256 #define ans_p8_shift 8 @@ -69,7 +75,7 @@ static inline void rabs_write(struct AnsCoder *ans, int val, AnsP8 p0) { ans->state = quot * ans_p8_precision + rem + (val ? 0 : p); } -static inline int rabs_read(struct AnsCoder *ans, AnsP8 p0) { +static inline int rabs_read(struct AnsDecoder *ans, AnsP8 p0) { int val; unsigned l_s; const AnsP8 p = ans_p8_precision - p0; @@ -82,6 +88,21 @@ static inline int rabs_read(struct AnsCoder *ans, AnsP8 p0) { ans->state % ans_p8_precision - (!val * p); return val; } + +static inline int ans_read_init(struct AnsDecoder *const ans, + const uint8_t *const buf, + int offset) { + if (offset < 3) + return 1; + ans->buf = buf; + ans->buf_offset = offset - 3; + ans->state = mem_get_be24(buf + offset - 3); + return 0; +} + +static inline int ans_read_end(struct AnsDecoder *const ans) { + return ans->state == l_base; +} #undef ANS_DIVREM #ifdef __cplusplus } // extern "C" diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 2aa66a43c..638bca04c 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -327,7 +327,7 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, } static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, - vpx_reader *r, + struct AnsDecoder *const r, MB_MODE_INFO *const mbmi, int plane, int row, int col, @@ -357,7 +357,8 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, } } -static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, +static int reconstruct_inter_block(MACROBLOCKD *const xd, + struct AnsDecoder *const r, MB_MODE_INFO *const mbmi, int plane, int row, int col, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -780,7 +781,8 @@ static MB_MODE_INFO *set_offsets(VP10_COMMON *const cm, MACROBLOCKD *const xd, static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, int mi_row, int mi_col, - vpx_reader *r, vpx_reader *tok, BLOCK_SIZE bsize, + vpx_reader *r, struct AnsDecoder *const tok, + BLOCK_SIZE bsize, int bwl, int bhl) { VP10_COMMON *const cm = &pbi->common; const int less8x8 = bsize < BLOCK_8X8; @@ -916,7 +918,7 @@ static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, // TODO(slavarnway): eliminate bsize and subsize in future commits static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd, int mi_row, int mi_col, - vpx_reader* r, vpx_reader *tok, + vpx_reader* r, struct AnsDecoder *const tok, BLOCK_SIZE bsize, int n4x4_l2) { VP10_COMMON *const cm = &pbi->common; const int n8x8_l2 = n4x4_l2 - 1; @@ -973,13 +975,13 @@ static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd, dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh); } -static void setup_token_decoder(const uint8_t *data, - const uint8_t *data_end, - size_t read_size, - struct vpx_internal_error_info *error_info, - vpx_reader *r, - vpx_decrypt_cb decrypt_cb, - void *decrypt_state) { +static void setup_bool_decoder(const uint8_t *data, + const uint8_t *data_end, + const size_t read_size, + struct vpx_internal_error_info *error_info, + vpx_reader *r, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { // Validate the calculated partition length. If the buffer // described by the partition can't be fully read, then restrict // it to the portion that can be (for EC mode) or throw an error. @@ -992,6 +994,27 @@ static void setup_token_decoder(const uint8_t *data, "Failed to allocate bool decoder %d", 1); } +static void setup_token_decoder(const uint8_t *data, + const uint8_t *data_end, + const size_t read_size, + struct vpx_internal_error_info *error_info, + struct AnsDecoder *const ans, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + (void) decrypt_cb; + (void) decrypt_state; + // Validate the calculated partition length. If the buffer + // described by the partition can't be fully read, then restrict + // it to the portion that can be (for EC mode) or throw an error. + if (!read_is_valid(data, read_size, data_end)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); + + if (ans_read_init(ans, data, (int)read_size)) //FIXME + vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, + "Failed to allocate token decoder %d", 1); +} + static void read_coef_probs_common(vp10_coeff_probs_model *coef_probs, vpx_reader *r) { int i, j, k, l, m; @@ -1467,12 +1490,12 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, if (token_offset > buf->size - 4) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt tile length"); - setup_token_decoder(buf->data + 4, data_end, token_offset, &cm->error, - &tile_data->bit_reader, pbi->decrypt_cb, - pbi->decrypt_state); + setup_bool_decoder(buf->data + 4, data_end, token_offset, &cm->error, + &tile_data->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); setup_token_decoder(buf->data + (4 + token_offset), data_end, buf->size - (4 + token_offset), &cm->error, - &tile_data->token_reader, pbi->decrypt_cb, + &tile_data->token_ans, pbi->decrypt_cb, pbi->decrypt_state); vp10_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); } @@ -1494,7 +1517,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, mi_col += MI_BLOCK_SIZE) { decode_partition(pbi, &tile_data->xd, mi_row, mi_col, &tile_data->bit_reader, - &tile_data->token_reader, BLOCK_64X64, 4); + &tile_data->token_ans, BLOCK_64X64, 4); } pbi->mb.corrupted |= tile_data->xd.corrupted; if (pbi->mb.corrupted) @@ -1544,7 +1567,8 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, if (pbi->frame_parallel_decode) vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX); - return vpx_reader_find_end(&tile_data->token_reader); + //return vpx_reader_find_end(&tile_data->token_ans); + return data_end; } static int tile_worker_hook(TileWorkerData *const tile_data, @@ -1568,7 +1592,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data, mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data->pbi, &tile_data->xd, mi_row, mi_col, &tile_data->bit_reader, - &tile_data->token_reader, + &tile_data->token_ans, BLOCK_64X64, 4); } } @@ -1695,9 +1719,9 @@ static const uint8_t *decode_tiles_mt(VP10Decoder *pbi, vp10_zero(tile_data->dqcoeff); vp10_tile_init(tile, cm, 0, buf->col); vp10_tile_init(&tile_data->xd.tile, cm, 0, buf->col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, - &tile_data->bit_reader, pbi->decrypt_cb, - pbi->decrypt_state); + setup_bool_decoder(buf->data, data_end, buf->size, &cm->error, + &tile_data->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); vp10_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); worker->had_error = 0; diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h index 53262c45b..4748cac40 100644 --- a/vp10/decoder/decoder.h +++ b/vp10/decoder/decoder.h @@ -18,6 +18,7 @@ #include "vpx_scale/yv12config.h" #include "vpx_util/vpx_thread.h" +#include "vp10/common/ans.h" #include "vp10/common/thread_common.h" #include "vp10/common/onyxc_int.h" #include "vp10/common/ppflags.h" @@ -31,7 +32,7 @@ extern "C" { typedef struct TileData { VP10_COMMON *cm; vpx_reader bit_reader; - vpx_reader token_reader; + struct AnsDecoder token_ans; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); @@ -40,7 +41,7 @@ typedef struct TileData { typedef struct TileWorkerData { struct VP10Decoder *pbi; vpx_reader bit_reader; - vpx_reader token_reader; + struct AnsDecoder token_ans; FRAME_COUNTS counts; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c index 2902ece7c..ee797b40f 100644 --- a/vp10/decoder/detokenize.c +++ b/vp10/decoder/detokenize.c @@ -11,6 +11,7 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" +#include "vp10/common/ans.h" #include "vp10/common/blockd.h" #include "vp10/common/common.h" #include "vp10/common/entropy.h" @@ -38,18 +39,31 @@ ++coef_counts[band][ctx][token]; \ } while (0) -static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) { + +static INLINE int rabs_read_tree(struct AnsDecoder *const ans, + const vpx_tree_index *const tree, + const vpx_prob *const probs) { + vpx_tree_index i = 0; + + while ((i = tree[i + rabs_read(ans, probs[i >> 1])]) > 0) + continue; + + return -i; +} + +static INLINE int read_coeff(const vpx_prob *const probs, int n, + struct AnsDecoder *const ans) { int i, val = 0; for (i = 0; i < n; ++i) - val = (val << 1) | vpx_read(r, probs[i]); + val = (val << 1) | rabs_read(ans, probs[i]); return val; } -static int decode_coefs(const MACROBLOCKD *xd, +static int decode_coefs(const MACROBLOCKD *const xd, PLANE_TYPE type, tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, int ctx, const int16_t *scan, const int16_t *nb, - vpx_reader *r) { + struct AnsDecoder *const ans) { FRAME_COUNTS *counts = xd->counts; const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = xd->fc; @@ -117,12 +131,12 @@ static int decode_coefs(const MACROBLOCKD *xd, prob = coef_probs[band][ctx]; if (counts) ++eob_branch_count[band][ctx]; - if (!vpx_read(r, prob[EOB_CONTEXT_NODE])) { + if (!rabs_read(ans, prob[EOB_CONTEXT_NODE])) { INCREMENT_COUNT(EOB_MODEL_TOKEN); break; } - while (!vpx_read(r, prob[ZERO_CONTEXT_NODE])) { + while (!rabs_read(ans, prob[ZERO_CONTEXT_NODE])) { INCREMENT_COUNT(ZERO_TOKEN); dqv = dq[1]; token_cache[scan[c]] = 0; @@ -134,14 +148,14 @@ static int decode_coefs(const MACROBLOCKD *xd, prob = coef_probs[band][ctx]; } - if (!vpx_read(r, prob[ONE_CONTEXT_NODE])) { + if (!rabs_read(ans, prob[ONE_CONTEXT_NODE])) { INCREMENT_COUNT(ONE_TOKEN); token = ONE_TOKEN; val = 1; } else { INCREMENT_COUNT(TWO_TOKEN); - token = vpx_read_tree(r, vp10_coef_con_tree, - vp10_pareto8_full[prob[PIVOT_NODE] - 1]); + token = rabs_read_tree(ans, vp10_coef_con_tree, + vp10_pareto8_full[prob[PIVOT_NODE] - 1]); switch (token) { case TWO_TOKEN: case THREE_TOKEN: @@ -149,38 +163,38 @@ static int decode_coefs(const MACROBLOCKD *xd, val = token; break; case CATEGORY1_TOKEN: - val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, r); + val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, ans); break; case CATEGORY2_TOKEN: - val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, r); + val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, ans); break; case CATEGORY3_TOKEN: - val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, r); + val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, ans); break; case CATEGORY4_TOKEN: - val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, r); + val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, ans); break; case CATEGORY5_TOKEN: - val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r); + val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, ans); break; case CATEGORY6_TOKEN: #if CONFIG_VP9_HIGHBITDEPTH switch (xd->bd) { case VPX_BITS_8: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r); + val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, ans); break; case VPX_BITS_10: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 16, r); + val = CAT6_MIN_VAL + read_coeff(cat6_prob, 16, ans); break; case VPX_BITS_12: - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 18, r); + val = CAT6_MIN_VAL + read_coeff(cat6_prob, 18, ans); break; default: assert(0); return -1; } #else - val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r); + val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, ans); #endif break; } @@ -188,13 +202,13 @@ static int decode_coefs(const MACROBLOCKD *xd, v = (val * dqv) >> dq_shift; #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VP9_HIGHBITDEPTH - dqcoeff[scan[c]] = highbd_check_range((vpx_read_bit(r) ? -v : v), + dqcoeff[scan[c]] = highbd_check_range((rabs_read(ans, 128) ? -v : v), xd->bd); #else - dqcoeff[scan[c]] = check_range(vpx_read_bit(r) ? -v : v); + dqcoeff[scan[c]] = check_range(rabs_read(ans, 128) ? -v : v); #endif // CONFIG_VP9_HIGHBITDEPTH #else - dqcoeff[scan[c]] = vpx_read_bit(r) ? -v : v; + dqcoeff[scan[c]] = rabs_read(ans, 128) ? -v : v; #endif // CONFIG_COEFFICIENT_RANGE_CHECKING token_cache[scan[c]] = vp10_pt_energy_class[token]; ++c; @@ -250,11 +264,12 @@ void dec_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, } } -int vp10_decode_block_tokens(MACROBLOCKD *xd, - int plane, const scan_order *sc, - int x, int y, - TX_SIZE tx_size, vpx_reader *r, - int seg_id) { +int vp10_decode_block_tokens(MACROBLOCKD *const xd, + int plane, const scan_order *sc, + int x, int y, + TX_SIZE tx_size, + struct AnsDecoder *const r, + int seg_id) { struct macroblockd_plane *const pd = &xd->plane[plane]; const int16_t *const dequant = pd->seg_dequant[seg_id]; const int ctx = get_entropy_context(tx_size, pd->above_context + x, diff --git a/vp10/decoder/detokenize.h b/vp10/decoder/detokenize.h index c3fd90a72..be7e38e55 100644 --- a/vp10/decoder/detokenize.h +++ b/vp10/decoder/detokenize.h @@ -12,7 +12,6 @@ #ifndef VP10_DECODER_DETOKENIZE_H_ #define VP10_DECODER_DETOKENIZE_H_ -#include "vpx_dsp/bitreader.h" #include "vp10/decoder/decoder.h" #include "vp10/common/scan.h" @@ -20,10 +19,13 @@ extern "C" { #endif -int vp10_decode_block_tokens(MACROBLOCKD *xd, +struct AnsDecoder; + +int vp10_decode_block_tokens(MACROBLOCKD *const xd, int plane, const scan_order *sc, int x, int y, - TX_SIZE tx_size, vpx_reader *r, + TX_SIZE tx_size, + struct AnsDecoder *const r, int seg_id); #ifdef __cplusplus