diff --git a/vp10/common/ans.h b/vp10/common/ans.h new file mode 100644 index 000000000..50c2b7344 --- /dev/null +++ b/vp10/common/ans.h @@ -0,0 +1,89 @@ +#ifndef VP10_COMMON_ANS_H_ +#define VP10_COMMON_ANS_H_ +// An implementation of Asymmetric Numeral Systems +// http://arxiv.org/abs/1311.2540v2 + +#include +#include "vpx_ports/mem_ops.h" + +#define ANS_DIVIDE_BY_MULTIPLY 0 +#if ANS_DIVIDE_BY_MULTIPLY +#include "divide.h" +#define ANS_INIT_DIVIDE init_fastdiv() +#define ANS_DIVREM(quotient, remainder, dividend, divisor) \ + do { \ + quotient = FASTDIV(dividend, divisor); \ + remainder = dividend - quotient * divisor; \ + } while (0) +#else +#define ANS_INIT_DIVIDE +#define ANS_DIVREM(quotient, remainder, dividend, divisor) \ + do { \ + quotient = dividend / divisor; \ + remainder = dividend % divisor; \ + } while (0) +#endif + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +struct AnsCoder { + uint8_t *buf; + int buf_offset; + uint32_t state; +}; + +typedef uint8_t AnsP8; +#define ans_p8_precision 256 +#define ans_p8_shift 8 +#define l_base (ans_p8_precision * 4) // l_base % precision must be 0 +#define io_base 256 +// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 } + +static inline void ans_write_init(struct AnsCoder *const ans, + uint8_t *const buf) { + ans->buf = buf; + ans->buf_offset = 0; + ans->state = l_base; +} + +static inline int ans_write_end(struct AnsCoder *const ans) { + mem_put_be24(ans->buf + ans->buf_offset, ans->state); + return ans->buf_offset + 3; +} + +// rABS with normalization +// p or p0 takes the place of l_s from the paper +// ans_p8_precision is m +static inline void rabs_write(struct AnsCoder *ans, int val, AnsP8 p0) { + const AnsP8 p = ans_p8_precision - p0; + const unsigned l_s = val ? p : p0; + unsigned quot, rem; + if ((!val && ans->state >= l_base / ans_p8_precision * io_base * p0) || + (val && ans->state >= l_base / ans_p8_precision * io_base * p)) { + ans->buf[ans->buf_offset++] = ans->state % io_base; + ans->state /= io_base; + } + ANS_DIVREM(quot, rem, ans->state, l_s); + ans->state = quot * ans_p8_precision + rem + (val ? 0 : p); +} + +static inline int rabs_read(struct AnsCoder *ans, AnsP8 p0) { + int val; + unsigned l_s; + const AnsP8 p = ans_p8_precision - p0; + if (ans->state < l_base) { + ans->state = ans->state * io_base + ans->buf[--ans->buf_offset]; + } + val = ans->state % ans_p8_precision < p; + l_s = val ? p : p0; + ans->state = (ans->state / ans_p8_precision) * l_s + + ans->state % ans_p8_precision - (!val * p); + return val; +} +#undef ANS_DIVREM +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // VP10_COMMON_ANS_H_ diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 72f649263..9e0b1ff06 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -195,10 +195,10 @@ static void pack_mb_tokens(vpx_writer *w, // This function serializes the tokens backwards both in token order and // bit order in each token. -static void pack_mb_tokens_r(vpx_writer *w, - const TOKENEXTRA *const start, - const TOKENEXTRA *const stop, - vpx_bit_depth_t bit_depth) { +static void pack_mb_tokens_ans(struct AnsCoder *const ans, + const TOKENEXTRA *const start, + const TOKENEXTRA *const stop, + vpx_bit_depth_t bit_depth) { const TOKENEXTRA *p; for (p = stop; p >= start; --p) { @@ -228,8 +228,8 @@ static void pack_mb_tokens_r(vpx_writer *w, if (b->base_val) { const int e = p->extra, l = b->len; if (l) { - vpx_write_bit(w, e & 1); - vp10_write_tree_r(w, b->tree, b->prob, e >> 1, l, 0); + rabs_write(ans, e & 1, 128); + vp10_write_tree_r(ans, b->tree, b->prob, e >> 1, l, 0); } } @@ -246,12 +246,12 @@ static void pack_mb_tokens_r(vpx_writer *w, if (t >= TWO_TOKEN && t < EOB_TOKEN) { int len = UNCONSTRAINED_NODES - p->skip_eob_node; int bits = v >> (n - len); - vp10_write_tree_r(w, vp10_coef_con_tree, + vp10_write_tree_r(ans, vp10_coef_con_tree, vp10_pareto8_full[p->context_tree[PIVOT_NODE] - 1], v, n - len, 0); - vp10_write_tree_r(w, vp10_coef_tree, p->context_tree, bits, len, i); + vp10_write_tree_r(ans, vp10_coef_tree, p->context_tree, bits, len, i); } else { - vp10_write_tree_r(w, vp10_coef_tree, p->context_tree, v, n, i); + vp10_write_tree_r(ans, vp10_coef_tree, p->context_tree, v, n, i); } } } @@ -990,7 +990,7 @@ static int get_refresh_mask(VP10_COMP *cpi) { static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr) { VP10_COMMON *const cm = &cpi->common; vpx_writer mode_bc; - vpx_writer token_bc; + struct AnsCoder token_ans; int tile_row, tile_col; TOKENEXTRA *tok_end; size_t total_size = 0; @@ -1004,7 +1004,9 @@ static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { int tile_idx = tile_row * tile_cols + tile_col; int put_tile_size = tile_col < tile_cols - 1 || tile_row < tile_rows - 1; - uint8_t *const mode_data_start = data_ptr + total_size + (put_tile_size ? 8 : 4); + uint8_t *const mode_data_start = + data_ptr + total_size + (put_tile_size ? 8 : 4); + int token_section_size; TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; tok_end = cpi->tile_tok[tile_row][tile_col] + @@ -1014,21 +1016,19 @@ static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr) { write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc); vpx_stop_encode(&mode_bc); - vpx_start_encode(&token_bc, mode_data_start + mode_bc.pos); - while (tok < tok_end) { - pack_mb_tokens(&token_bc, &tok, tok_end, cm->bit_depth); - } - assert(tok == tok_end); - vpx_stop_encode(&token_bc); + ans_write_init(&token_ans, mode_data_start + mode_bc.pos); + pack_mb_tokens_ans(&token_ans, tok, tok_end, cm->bit_depth); + token_section_size = ans_write_end(&token_ans); if (put_tile_size) { // size of this tile - mem_put_be32(data_ptr + total_size, 4 + mode_bc.pos + token_bc.pos); + mem_put_be32(data_ptr + total_size, + 4 + mode_bc.pos + token_section_size); total_size += 4; } // put where the token section begins mem_put_be32(data_ptr + total_size, mode_bc.pos); - total_size += 4 + mode_bc.pos + token_bc.pos; + total_size += 4 + mode_bc.pos + token_section_size; } } diff --git a/vp10/encoder/treewriter.h b/vp10/encoder/treewriter.h index 5fb5c96ab..10f8e9d7b 100644 --- a/vp10/encoder/treewriter.h +++ b/vp10/encoder/treewriter.h @@ -12,6 +12,7 @@ #define VP10_ENCODER_TREEWRITER_H_ #include "vpx_dsp/bitwriter.h" +#include "vp10/common/ans.h" #ifdef __cplusplus extern "C" { @@ -30,8 +31,10 @@ void vp10_tokens_from_tree(struct vp10_token*, const vpx_tree_index *); // TODO: CHECK MAX REVERSIBLE TREE SIZE, security concerns #define VP10_TOKEN_SCRATCH_LEN 32 -static INLINE void vp10_write_tree_r(vpx_writer *w, const vpx_tree_index *tree, - const vpx_prob *probs, int bits, int len, +static INLINE void vp10_write_tree_r(struct AnsCoder *const ans, + const vpx_tree_index *const tree, + const vpx_prob *const probs, + int bits, int len, vpx_tree_index tidx) { int i; struct { uint8_t bit; vpx_prob prob; } scratch[VP10_TOKEN_SCRATCH_LEN]; @@ -45,7 +48,7 @@ static INLINE void vp10_write_tree_r(vpx_writer *w, const vpx_tree_index *tree, tidx = tree[tidx + bit]; } for (i = len; i >= 0; --i) { - vpx_write(w, scratch[i].bit, scratch[i].prob); + rabs_write(ans, scratch[i].bit, scratch[i].prob); } } #undef VP10_TOKEN_SCRATCH_LEN