Merge "Change bitreader to use a larger window."

This commit is contained in:
John Koleszar 2010-06-17 18:08:36 -07:00 committed by Code Review
commit c65e8e8e46
11 changed files with 114 additions and 206 deletions

View File

@ -70,15 +70,11 @@ DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
DEFINE(detok_eob, offsetof(DETOK, eob));
DEFINE(bool_decoder_lowvalue, offsetof(BOOL_DECODER, lowvalue));
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
DEFINE(bool_decoder_user_buffer_sz, offsetof(BOOL_DECODER, user_buffer_sz));
DEFINE(bool_decoder_decode_buffer, offsetof(BOOL_DECODER, decode_buffer));
DEFINE(bool_decoder_read_ptr, offsetof(BOOL_DECODER, read_ptr));
DEFINE(bool_decoder_write_ptr, offsetof(BOOL_DECODER, write_ptr));
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));

View File

@ -16,9 +16,6 @@
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_v6
#undef vp8_dbool_stop
#define vp8_dbool_stop vp8dx_stop_decode_v6
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_v6
@ -33,9 +30,6 @@
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_neon
#undef vp8_dbool_stop
#define vp8_dbool_stop vp8dx_stop_decode_neon
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_neon

View File

@ -26,7 +26,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
pbi->dequant.idct = vp8_dequant_idct_neon;
pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
@ -36,7 +35,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
pbi->dequant.idct = vp8_dequant_idct_v6;
pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;

View File

@ -26,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
};
static void copy_in(BOOL_DECODER *br, unsigned int to_write)
{
if (to_write > br->user_buffer_sz)
to_write = br->user_buffer_sz;
memcpy(br->write_ptr, br->user_buffer, to_write);
br->user_buffer += to_write;
br->user_buffer_sz -= to_write;
br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
}
int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
unsigned int source_sz)
{
br->lowvalue = 0;
br->user_buffer_end = source+source_sz;
br->user_buffer = source;
br->value = 0;
br->count = -8;
br->range = 255;
br->count = 0;
br->user_buffer = source;
br->user_buffer_sz = source_sz;
if (source_sz && !source)
return 1;
/* Allocate the ring buffer backing store with alignment equal to the
* buffer size*2 so that a single pointer can be used for wrapping rather
* than a pointer+offset.
*/
br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
VP8_BOOL_DECODER_SZ);
if (!br->decode_buffer)
return 1;
/* Populate the buffer */
br->read_ptr = br->decode_buffer;
br->write_ptr = br->decode_buffer;
copy_in(br, VP8_BOOL_DECODER_SZ);
vp8dx_bool_decoder_fill_c(br);
/* Read the first byte */
br->value = (*br->read_ptr++) << 8;
return 0;
}
void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
{
int left, right;
const unsigned char *bufptr;
const unsigned char *bufend;
VP8_BD_VALUE value;
int count;
bufend = br->user_buffer_end;
bufptr = br->user_buffer;
value = br->value;
count = br->count;
/* Find available room in the buffer */
left = 0;
right = br->read_ptr - br->write_ptr;
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
if (right < 0)
{
/* Read pointer is behind the write pointer. We can write from the
* write pointer to the end of the buffer.
*/
right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
left = br->read_ptr - br->decode_buffer;
}
if (right + left < 128)
return;
if (right)
copy_in(br, right);
if (left)
{
br->write_ptr = br->decode_buffer;
copy_in(br, left);
}
}
void vp8dx_stop_decode_c(BOOL_DECODER *bc)
{
vpx_free(bc->decode_buffer);
bc->decode_buffer = 0;
br->user_buffer = bufptr;
br->value = value;
br->count = count;
}
#if 0
@ -120,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc)
int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
{
unsigned int bit=0;
VP8_BD_VALUE value;
unsigned int split;
unsigned int bigsplit;
register unsigned int range = br->range;
register unsigned int value = br->value;
VP8_BD_VALUE bigsplit;
int count;
unsigned int range;
value = br->value;
count = br->count;
range = br->range;
split = 1 + (((range-1) * probability) >> 8);
bigsplit = (split<<8);
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
if(value >= bigsplit)
@ -144,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
}*/
{
int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
if(count <= 0)
{
value |= (*br->read_ptr) << (-count);
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
count += 8 ;
}
br->count = count;
}
br->value = value;
br->count = count;
br->range = range;
if (count < 0)
vp8dx_bool_decoder_fill_c(br);
return bit;
}

View File

@ -11,51 +11,31 @@
#ifndef DBOOLHUFF_H
#define DBOOLHUFF_H
#include <stddef.h>
#include <limits.h>
#include "vpx_ports/config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
/* Size of the bool decoder backing storage
*
* This size was chosen to be greater than the worst case encoding of a
* single macroblock. This was calcluated as follows (python):
*
* def max_cost(prob):
* return max(prob_costs[prob], prob_costs[255-prob]) / 256;
*
* tree_nodes_cost = 7 * max_cost(255)
* extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
* sign_bit_cost = max_cost(128)
* total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
*
* where the prob_costs table was taken from the C vp8_prob_cost table in
* boolhuff.c and the extra_bits table was taken from the 11 extrabits for
* a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
*
* This equation produced a maximum of 79 bits per coefficient. Scaling up
* to the macroblock level:
*
* 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
*
* 4096 bytes = 32768 bits > 31600
*/
#define VP8_BOOL_DECODER_SZ 4096
#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1)
#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
typedef size_t VP8_BD_VALUE;
# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
/*This is meant to be a large, positive constant that can still be efficiently
loaded as an immediate (on platforms like ARM, for example).
Even relatively modest values like 100 would work fine.*/
# define VP8_LOTS_OF_BITS (0x40000000)
struct vp8_dboolhuff_rtcd_vtable;
typedef struct
{
unsigned int lowvalue;
unsigned int range;
unsigned int value;
int count;
const unsigned char *user_buffer_end;
const unsigned char *user_buffer;
unsigned int user_buffer_sz;
unsigned char *decode_buffer;
const unsigned char *read_ptr;
unsigned char *write_ptr;
VP8_BD_VALUE value;
int count;
unsigned int range;
#if CONFIG_RUNTIME_CPU_DETECT
struct vp8_dboolhuff_rtcd_vtable *rtcd;
#endif
@ -63,7 +43,6 @@ typedef struct
#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
const unsigned char *source, unsigned int source_sz)
#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
@ -76,10 +55,6 @@ typedef struct
#define vp8_dbool_start vp8dx_start_decode_c
#endif
#ifndef vp8_dbool_stop
#define vp8_dbool_stop vp8dx_stop_decode_c
#endif
#ifndef vp8_dbool_fill
#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
#endif
@ -93,20 +68,17 @@ typedef struct
#endif
extern prototype_dbool_start(vp8_dbool_start);
extern prototype_dbool_stop(vp8_dbool_stop);
extern prototype_dbool_fill(vp8_dbool_fill);
extern prototype_dbool_debool(vp8_dbool_debool);
extern prototype_dbool_devalue(vp8_dbool_devalue);
typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
typedef struct vp8_dboolhuff_rtcd_vtable {
vp8_dbool_start_fn_t start;
vp8_dbool_stop_fn_t stop;
vp8_dbool_fill_fn_t fill;
vp8_dbool_debool_fn_t debool;
vp8_dbool_devalue_fn_t devalue;
@ -123,17 +95,6 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
#define IF_RTCD(x) NULL
//#endif
static unsigned char *br_ptr_advance(const unsigned char *_ptr,
unsigned int n)
{
uintptr_t ptr = (uintptr_t)_ptr;
ptr += n;
ptr &= VP8_BOOL_DECODER_PTR_MASK;
return (void *)ptr;
}
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
/* wrapper functions to hide RTCD. static means inline means hopefully no
@ -147,12 +108,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br,
#endif
return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
}
static void vp8dx_stop_decode(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
}
static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
}
/*The refill loop is used in several places, so define it in a macro to make
sure they're all consistent.
An inline function would be cleaner, but has a significant penalty, because
multiple BOOL_DECODER fields must be modified, and the compiler is not smart
enough to eliminate the stores to those fields and the subsequent reloads
from them when inlining the function.*/
#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
do \
{ \
int shift; \
for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
{ \
if((_bufptr) >= (_bufend)) { \
(_count) = VP8_LOTS_OF_BITS; \
break; \
} \
(_count) += 8; \
(_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
shift -= 8; \
} \
} \
while(0)
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
/*
* Until optimized versions of this function are available, we
@ -161,13 +144,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
*return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
*/
unsigned int bit = 0;
VP8_BD_VALUE value;
unsigned int split;
unsigned int bigsplit;
register unsigned int range = br->range;
register unsigned int value = br->value;
VP8_BD_VALUE bigsplit;
int count;
unsigned int range;
value = br->value;
count = br->count;
range = br->range;
split = 1 + (((range - 1) * probability) >> 8);
bigsplit = (split << 8);
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
@ -186,23 +174,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
}*/
{
int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
if (count <= 0)
{
value |= (*br->read_ptr) << (-count);
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
count += 8 ;
}
br->count = count;
}
br->value = value;
br->count = count;
br->range = range;
if(count < 0)
vp8dx_bool_decoder_fill(br);
return bit;
}

View File

@ -172,8 +172,6 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
MACROBLOCKD *xd = &pbi->mb;
mbmi->need_to_clamp_mvs = 0;
vp8dx_bool_decoder_fill(bc);
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
xd->mb_to_left_edge = -((mb_col * 16) << 3);

View File

@ -455,7 +455,6 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
else
pbi->debugoutput =0;
*/
vp8dx_bool_decoder_fill(xd->current_bc);
vp8_decode_macroblock(pbi, xd);
@ -563,18 +562,7 @@ static void stop_token_decoder(VP8D_COMP *pbi)
VP8_COMMON *pc = &pbi->common;
if (pc->multi_token_partition != ONE_PARTITION)
{
int num_part = (1 << pc->multi_token_partition);
for (i = 0; i < num_part; i++)
{
vp8dx_stop_decode(&pbi->mbc[i]);
}
vpx_free(pbi->mbc);
}
else
vp8dx_stop_decode(& pbi->bc2);
}
static void init_frame(VP8D_COMP *pbi)
@ -883,7 +871,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
}
vp8dx_bool_decoder_fill(bc);
{
// read coef probability tree
@ -970,8 +957,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
stop_token_decoder(pbi);
vp8dx_stop_decode(bc);
// vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos);
// If this was a kf or Gf note the Q used

View File

@ -80,7 +80,6 @@ void vp8_kfread_modes(VP8D_COMP *pbi)
{
MB_PREDICTION_MODE y_mode;
vp8dx_bool_decoder_fill(bc);
// Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
// By default on a key frame reset all MBs to segment 0
m->mbmi.segment_id = 0;

View File

@ -15,7 +15,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#define BR_COUNT 8
#define BOOL_DATA UINT8
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
@ -105,6 +104,10 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
}
}
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define FILL \
if(count < 0) \
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
#define NORMALIZE \
/*if(range < 0x80)*/ \
{ \
@ -112,17 +115,13 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
range <<= shift; \
value <<= shift; \
count -= shift; \
if(count <= 0) \
{ \
count += BR_COUNT ; \
value |= (*bufptr) << (BR_COUNT-count); \
bufptr = br_ptr_advance(bufptr, 1); \
} \
}
#define DECODE_AND_APPLYSIGN(value_to_sign) \
split = (range + 1) >> 1; \
if ( (value >> 8) < split ) \
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \
range = split; \
v= value_to_sign; \
@ -130,28 +129,25 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
else \
{ \
range = range-split; \
value = value-(split<<8); \
value = value-bigsplit; \
v = -value_to_sign; \
} \
range +=range; \
value +=value; \
if (!--count) \
{ \
count = BR_COUNT; \
value |= *bufptr; \
bufptr = br_ptr_advance(bufptr, 1); \
}
count--;
#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) )>> 8); \
if ( (value >> 8) < split ) \
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
goto branch; \
} \
value -= (split<<8); \
value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
@ -159,7 +155,9 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) ) >> 8); \
if ( (value >> 8) < split ) \
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
@ -170,7 +168,7 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
goto branch; \
} goto BLOCK_FINISHED; /*for malformed input */\
} \
value -= (split<<8); \
value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
@ -188,10 +186,12 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
if(value >= (split<<8))\
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if(value >= bigsplit)\
{\
range = range-split;\
value = value-(split<<8);\
value = value-bigsplit;\
val += ((UINT16)1<<bits_count);\
}\
else\
@ -217,11 +217,13 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
register int count;
const BOOL_DATA *bufptr;
const BOOL_DATA *bufend;
register unsigned int range;
register unsigned int value;
VP8_BD_VALUE value;
const int *scan;
register unsigned int shift;
UINT32 split;
VP8_BD_VALUE bigsplit;
INT16 *qcoeff_ptr;
const vp8_prob *coef_probs;
@ -253,10 +255,11 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
qcoeff_ptr = &x->qcoeff[0];
}
bufend = bc->user_buffer_end;
bufptr = bc->user_buffer;
value = bc->value;
count = bc->count;
range = bc->range;
value = bc->value;
bufptr = bc->read_ptr;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
@ -384,10 +387,11 @@ BLOCK_FINISHED:
goto BLOCK_LOOP;
}
bc->count = count;
FILL
bc->user_buffer = bufptr;
bc->value = value;
bc->count = count;
bc->range = range;
bc->read_ptr = bufptr;
return eobtotal;
}

View File

@ -24,7 +24,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
pbi->dequant.idct = vp8_dequant_idct_c;
pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
#if 0 //For use with RTCD, when implemented
pbi->dboolhuff.debool = vp8dx_decode_bool_c;

View File

@ -202,7 +202,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
vp8_build_uvmvs(xd, pc->full_pixel);
vp8dx_bool_decoder_fill(xd->current_bc);
vp8_decode_macroblock(pbi, xd);