Change bitreader to use a larger window.

Change bitreading functions to use a larger window which is refilled less
 often.

This makes it cheap enough to do bounds checking each time the window is
 refilled, which avoids the need to copy the input into a large circular
 buffer.
This uses less memory and speeds up the total decode time by 1.6% on an ARM11,
 2.8% on a Cortex A8, and 2.2% on x86-32, but less than 1% on x86-64.

Inlining vp8dx_bool_decoder_fill() has a big penalty on x86-32, as does moving
 the refill loop to the front of vp8dx_decode_bool().
However, having the refill loop between computation of the split values and
 the branch in vp8_decode_mb_tokens() is a big win on ARM (presumably due to
 memory latency and code size: refilling after normalization duplicates the
 code in the DECODE_AND_BRANCH_IF_ZERO and DECODE_AND_LOOP_IF_ZERO cases.
Unfortunately, refilling at the end of vp8dx_bool_decoder_fill() and at the
 beginning of each decode step in vp8_decode_mb_tokens() means the latter
 requires an extra refill at the end.
Platform-specific versions could avoid the problem, but would require most of
 detokenize.c to be duplicated.

Change-Id: I16c782a63376f2a15b78f8086d899b987204c1c7
This commit is contained in:
Timothy B. Terriberry 2010-05-05 17:58:19 -04:00
parent ec1871554b
commit c17b62e1bd
11 changed files with 114 additions and 206 deletions

View File

@ -70,15 +70,11 @@ DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs)); DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
DEFINE(detok_eob, offsetof(DETOK, eob)); DEFINE(detok_eob, offsetof(DETOK, eob));
DEFINE(bool_decoder_lowvalue, offsetof(BOOL_DECODER, lowvalue)); DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range)); DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value)); DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count)); DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer)); DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
DEFINE(bool_decoder_user_buffer_sz, offsetof(BOOL_DECODER, user_buffer_sz));
DEFINE(bool_decoder_decode_buffer, offsetof(BOOL_DECODER, decode_buffer));
DEFINE(bool_decoder_read_ptr, offsetof(BOOL_DECODER, read_ptr));
DEFINE(bool_decoder_write_ptr, offsetof(BOOL_DECODER, write_ptr));
DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val)); DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length)); DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));

View File

@ -16,9 +16,6 @@
#undef vp8_dbool_start #undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_v6 #define vp8_dbool_start vp8dx_start_decode_v6
#undef vp8_dbool_stop
#define vp8_dbool_stop vp8dx_stop_decode_v6
#undef vp8_dbool_fill #undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_v6 #define vp8_dbool_fill vp8_bool_decoder_fill_v6
@ -33,9 +30,6 @@
#undef vp8_dbool_start #undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_neon #define vp8_dbool_start vp8dx_start_decode_neon
#undef vp8_dbool_stop
#define vp8_dbool_stop vp8dx_stop_decode_neon
#undef vp8_dbool_fill #undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_neon #define vp8_dbool_fill vp8_bool_decoder_fill_neon

View File

@ -26,7 +26,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
pbi->dequant.idct = vp8_dequant_idct_neon; pbi->dequant.idct = vp8_dequant_idct_neon;
pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon; pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
pbi->dboolhuff.start = vp8dx_start_decode_c; pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c; pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c; pbi->dboolhuff.devalue = vp8dx_decode_value_c;
@ -36,7 +35,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
pbi->dequant.idct = vp8_dequant_idct_v6; pbi->dequant.idct = vp8_dequant_idct_v6;
pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6; pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
pbi->dboolhuff.start = vp8dx_start_decode_c; pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c; pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c; pbi->dboolhuff.devalue = vp8dx_decode_value_c;

View File

@ -26,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
}; };
static void copy_in(BOOL_DECODER *br, unsigned int to_write)
{
if (to_write > br->user_buffer_sz)
to_write = br->user_buffer_sz;
memcpy(br->write_ptr, br->user_buffer, to_write);
br->user_buffer += to_write;
br->user_buffer_sz -= to_write;
br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
}
int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source, int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
unsigned int source_sz) unsigned int source_sz)
{ {
br->lowvalue = 0; br->user_buffer_end = source+source_sz;
br->range = 255;
br->count = 0;
br->user_buffer = source; br->user_buffer = source;
br->user_buffer_sz = source_sz; br->value = 0;
br->count = -8;
br->range = 255;
if (source_sz && !source) if (source_sz && !source)
return 1; return 1;
/* Allocate the ring buffer backing store with alignment equal to the
* buffer size*2 so that a single pointer can be used for wrapping rather
* than a pointer+offset.
*/
br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
VP8_BOOL_DECODER_SZ);
if (!br->decode_buffer)
return 1;
/* Populate the buffer */ /* Populate the buffer */
br->read_ptr = br->decode_buffer; vp8dx_bool_decoder_fill_c(br);
br->write_ptr = br->decode_buffer;
copy_in(br, VP8_BOOL_DECODER_SZ);
/* Read the first byte */
br->value = (*br->read_ptr++) << 8;
return 0; return 0;
} }
void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br) void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
{ {
int left, right; const unsigned char *bufptr;
const unsigned char *bufend;
VP8_BD_VALUE value;
int count;
bufend = br->user_buffer_end;
bufptr = br->user_buffer;
value = br->value;
count = br->count;
/* Find available room in the buffer */ VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
left = 0;
right = br->read_ptr - br->write_ptr;
if (right < 0) br->user_buffer = bufptr;
{ br->value = value;
/* Read pointer is behind the write pointer. We can write from the br->count = count;
* write pointer to the end of the buffer.
*/
right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
left = br->read_ptr - br->decode_buffer;
}
if (right + left < 128)
return;
if (right)
copy_in(br, right);
if (left)
{
br->write_ptr = br->decode_buffer;
copy_in(br, left);
}
}
void vp8dx_stop_decode_c(BOOL_DECODER *bc)
{
vpx_free(bc->decode_buffer);
bc->decode_buffer = 0;
} }
#if 0 #if 0
@ -120,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc)
int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability) int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
{ {
unsigned int bit=0; unsigned int bit=0;
VP8_BD_VALUE value;
unsigned int split; unsigned int split;
unsigned int bigsplit; VP8_BD_VALUE bigsplit;
register unsigned int range = br->range; int count;
register unsigned int value = br->value; unsigned int range;
value = br->value;
count = br->count;
range = br->range;
split = 1 + (((range-1) * probability) >> 8); split = 1 + (((range-1) * probability) >> 8);
bigsplit = (split<<8); bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split; range = split;
if(value >= bigsplit) if(value >= bigsplit)
@ -144,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
}*/ }*/
{ {
int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range]; register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift; range <<= shift;
value <<= shift; value <<= shift;
count -= shift; count -= shift;
if(count <= 0)
{
value |= (*br->read_ptr) << (-count);
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
count += 8 ;
}
br->count = count;
} }
br->value = value; br->value = value;
br->count = count;
br->range = range; br->range = range;
if (count < 0)
vp8dx_bool_decoder_fill_c(br);
return bit; return bit;
} }

View File

@ -11,51 +11,31 @@
#ifndef DBOOLHUFF_H #ifndef DBOOLHUFF_H
#define DBOOLHUFF_H #define DBOOLHUFF_H
#include <stddef.h>
#include <limits.h>
#include "vpx_ports/config.h" #include "vpx_ports/config.h"
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h" #include "vpx/vpx_integer.h"
/* Size of the bool decoder backing storage typedef size_t VP8_BD_VALUE;
*
* This size was chosen to be greater than the worst case encoding of a # define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
* single macroblock. This was calcluated as follows (python): /*This is meant to be a large, positive constant that can still be efficiently
* loaded as an immediate (on platforms like ARM, for example).
* def max_cost(prob): Even relatively modest values like 100 would work fine.*/
* return max(prob_costs[prob], prob_costs[255-prob]) / 256; # define VP8_LOTS_OF_BITS (0x40000000)
*
* tree_nodes_cost = 7 * max_cost(255)
* extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
* sign_bit_cost = max_cost(128)
* total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
*
* where the prob_costs table was taken from the C vp8_prob_cost table in
* boolhuff.c and the extra_bits table was taken from the 11 extrabits for
* a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
*
* This equation produced a maximum of 79 bits per coefficient. Scaling up
* to the macroblock level:
*
* 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
*
* 4096 bytes = 32768 bits > 31600
*/
#define VP8_BOOL_DECODER_SZ 4096
#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1)
#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
struct vp8_dboolhuff_rtcd_vtable; struct vp8_dboolhuff_rtcd_vtable;
typedef struct typedef struct
{ {
unsigned int lowvalue; const unsigned char *user_buffer_end;
unsigned int range;
unsigned int value;
int count;
const unsigned char *user_buffer; const unsigned char *user_buffer;
unsigned int user_buffer_sz; VP8_BD_VALUE value;
unsigned char *decode_buffer; int count;
const unsigned char *read_ptr; unsigned int range;
unsigned char *write_ptr;
#if CONFIG_RUNTIME_CPU_DETECT #if CONFIG_RUNTIME_CPU_DETECT
struct vp8_dboolhuff_rtcd_vtable *rtcd; struct vp8_dboolhuff_rtcd_vtable *rtcd;
#endif #endif
@ -63,7 +43,6 @@ typedef struct
#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \ #define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
const unsigned char *source, unsigned int source_sz) const unsigned char *source, unsigned int source_sz)
#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br) #define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability) #define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits); #define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
@ -76,10 +55,6 @@ typedef struct
#define vp8_dbool_start vp8dx_start_decode_c #define vp8_dbool_start vp8dx_start_decode_c
#endif #endif
#ifndef vp8_dbool_stop
#define vp8_dbool_stop vp8dx_stop_decode_c
#endif
#ifndef vp8_dbool_fill #ifndef vp8_dbool_fill
#define vp8_dbool_fill vp8dx_bool_decoder_fill_c #define vp8_dbool_fill vp8dx_bool_decoder_fill_c
#endif #endif
@ -93,20 +68,17 @@ typedef struct
#endif #endif
extern prototype_dbool_start(vp8_dbool_start); extern prototype_dbool_start(vp8_dbool_start);
extern prototype_dbool_stop(vp8_dbool_stop);
extern prototype_dbool_fill(vp8_dbool_fill); extern prototype_dbool_fill(vp8_dbool_fill);
extern prototype_dbool_debool(vp8_dbool_debool); extern prototype_dbool_debool(vp8_dbool_debool);
extern prototype_dbool_devalue(vp8_dbool_devalue); extern prototype_dbool_devalue(vp8_dbool_devalue);
typedef prototype_dbool_start((*vp8_dbool_start_fn_t)); typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t)); typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t)); typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t)); typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
typedef struct vp8_dboolhuff_rtcd_vtable { typedef struct vp8_dboolhuff_rtcd_vtable {
vp8_dbool_start_fn_t start; vp8_dbool_start_fn_t start;
vp8_dbool_stop_fn_t stop;
vp8_dbool_fill_fn_t fill; vp8_dbool_fill_fn_t fill;
vp8_dbool_debool_fn_t debool; vp8_dbool_debool_fn_t debool;
vp8_dbool_devalue_fn_t devalue; vp8_dbool_devalue_fn_t devalue;
@ -123,17 +95,6 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
#define IF_RTCD(x) NULL #define IF_RTCD(x) NULL
//#endif //#endif
static unsigned char *br_ptr_advance(const unsigned char *_ptr,
unsigned int n)
{
uintptr_t ptr = (uintptr_t)_ptr;
ptr += n;
ptr &= VP8_BOOL_DECODER_PTR_MASK;
return (void *)ptr;
}
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
/* wrapper functions to hide RTCD. static means inline means hopefully no /* wrapper functions to hide RTCD. static means inline means hopefully no
@ -147,12 +108,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br,
#endif #endif
return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz); return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
} }
static void vp8dx_stop_decode(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
}
static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) { static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, fill)(br); DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
} }
/*The refill loop is used in several places, so define it in a macro to make
sure they're all consistent.
An inline function would be cleaner, but has a significant penalty, because
multiple BOOL_DECODER fields must be modified, and the compiler is not smart
enough to eliminate the stores to those fields and the subsequent reloads
from them when inlining the function.*/
#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
do \
{ \
int shift; \
for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
{ \
if((_bufptr) >= (_bufend)) { \
(_count) = VP8_LOTS_OF_BITS; \
break; \
} \
(_count) += 8; \
(_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
shift -= 8; \
} \
} \
while(0)
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
/* /*
* Until optimized versions of this function are available, we * Until optimized versions of this function are available, we
@ -161,13 +144,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
*return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability); *return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
*/ */
unsigned int bit = 0; unsigned int bit = 0;
VP8_BD_VALUE value;
unsigned int split; unsigned int split;
unsigned int bigsplit; VP8_BD_VALUE bigsplit;
register unsigned int range = br->range; int count;
register unsigned int value = br->value; unsigned int range;
value = br->value;
count = br->count;
range = br->range;
split = 1 + (((range - 1) * probability) >> 8); split = 1 + (((range - 1) * probability) >> 8);
bigsplit = (split << 8); bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split; range = split;
@ -186,23 +174,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
}*/ }*/
{ {
int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range]; register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift; range <<= shift;
value <<= shift; value <<= shift;
count -= shift; count -= shift;
if (count <= 0)
{
value |= (*br->read_ptr) << (-count);
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
count += 8 ;
}
br->count = count;
} }
br->value = value; br->value = value;
br->count = count;
br->range = range; br->range = range;
if(count < 0)
vp8dx_bool_decoder_fill(br);
return bit; return bit;
} }

View File

@ -172,8 +172,6 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
MACROBLOCKD *xd = &pbi->mb; MACROBLOCKD *xd = &pbi->mb;
mbmi->need_to_clamp_mvs = 0; mbmi->need_to_clamp_mvs = 0;
vp8dx_bool_decoder_fill(bc);
// Distance of Mb to the various image edges. // Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_left_edge = -((mb_col * 16) << 3);

View File

@ -455,7 +455,6 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
else else
pbi->debugoutput =0; pbi->debugoutput =0;
*/ */
vp8dx_bool_decoder_fill(xd->current_bc);
vp8_decode_macroblock(pbi, xd); vp8_decode_macroblock(pbi, xd);
@ -563,18 +562,7 @@ static void stop_token_decoder(VP8D_COMP *pbi)
VP8_COMMON *pc = &pbi->common; VP8_COMMON *pc = &pbi->common;
if (pc->multi_token_partition != ONE_PARTITION) if (pc->multi_token_partition != ONE_PARTITION)
{
int num_part = (1 << pc->multi_token_partition);
for (i = 0; i < num_part; i++)
{
vp8dx_stop_decode(&pbi->mbc[i]);
}
vpx_free(pbi->mbc); vpx_free(pbi->mbc);
}
else
vp8dx_stop_decode(& pbi->bc2);
} }
static void init_frame(VP8D_COMP *pbi) static void init_frame(VP8D_COMP *pbi)
@ -883,7 +871,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
} }
vp8dx_bool_decoder_fill(bc);
{ {
// read coef probability tree // read coef probability tree
@ -970,8 +957,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
stop_token_decoder(pbi); stop_token_decoder(pbi);
vp8dx_stop_decode(bc);
// vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos);
// If this was a kf or Gf note the Q used // If this was a kf or Gf note the Q used

View File

@ -80,7 +80,6 @@ void vp8_kfread_modes(VP8D_COMP *pbi)
{ {
MB_PREDICTION_MODE y_mode; MB_PREDICTION_MODE y_mode;
vp8dx_bool_decoder_fill(bc);
// Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default) // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
// By default on a key frame reset all MBs to segment 0 // By default on a key frame reset all MBs to segment 0
m->mbmi.segment_id = 0; m->mbmi.segment_id = 0;

View File

@ -15,7 +15,6 @@
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#define BR_COUNT 8
#define BOOL_DATA UINT8 #define BOOL_DATA UINT8
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
@ -105,6 +104,10 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
} }
} }
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define FILL \
if(count < 0) \
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
#define NORMALIZE \ #define NORMALIZE \
/*if(range < 0x80)*/ \ /*if(range < 0x80)*/ \
{ \ { \
@ -112,17 +115,13 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
range <<= shift; \ range <<= shift; \
value <<= shift; \ value <<= shift; \
count -= shift; \ count -= shift; \
if(count <= 0) \
{ \
count += BR_COUNT ; \
value |= (*bufptr) << (BR_COUNT-count); \
bufptr = br_ptr_advance(bufptr, 1); \
} \
} }
#define DECODE_AND_APPLYSIGN(value_to_sign) \ #define DECODE_AND_APPLYSIGN(value_to_sign) \
split = (range + 1) >> 1; \ split = (range + 1) >> 1; \
if ( (value >> 8) < split ) \ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \ { \
range = split; \ range = split; \
v= value_to_sign; \ v= value_to_sign; \
@ -130,28 +129,25 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
else \ else \
{ \ { \
range = range-split; \ range = range-split; \
value = value-(split<<8); \ value = value-bigsplit; \
v = -value_to_sign; \ v = -value_to_sign; \
} \ } \
range +=range; \ range +=range; \
value +=value; \ value +=value; \
if (!--count) \ count--;
{ \
count = BR_COUNT; \
value |= *bufptr; \
bufptr = br_ptr_advance(bufptr, 1); \
}
#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \ #define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
{ \ { \
split = 1 + ((( probability*(range-1) ) )>> 8); \ split = 1 + ((( probability*(range-1) ) )>> 8); \
if ( (value >> 8) < split ) \ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \ { \
range = split; \ range = split; \
NORMALIZE \ NORMALIZE \
goto branch; \ goto branch; \
} \ } \
value -= (split<<8); \ value -= bigsplit; \
range = range - split; \ range = range - split; \
NORMALIZE \ NORMALIZE \
} }
@ -159,7 +155,9 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \ #define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
{ \ { \
split = 1 + ((( probability*(range-1) ) ) >> 8); \ split = 1 + ((( probability*(range-1) ) ) >> 8); \
if ( (value >> 8) < split ) \ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \ { \
range = split; \ range = split; \
NORMALIZE \ NORMALIZE \
@ -170,7 +168,7 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
goto branch; \ goto branch; \
} goto BLOCK_FINISHED; /*for malformed input */\ } goto BLOCK_FINISHED; /*for malformed input */\
} \ } \
value -= (split<<8); \ value -= bigsplit; \
range = range - split; \ range = range - split; \
NORMALIZE \ NORMALIZE \
} }
@ -188,10 +186,12 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\ #define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \ split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
if(value >= (split<<8))\ bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if(value >= bigsplit)\
{\ {\
range = range-split;\ range = range-split;\
value = value-(split<<8);\ value = value-bigsplit;\
val += ((UINT16)1<<bits_count);\ val += ((UINT16)1<<bits_count);\
}\ }\
else\ else\
@ -217,11 +217,13 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
register int count; register int count;
const BOOL_DATA *bufptr; const BOOL_DATA *bufptr;
const BOOL_DATA *bufend;
register unsigned int range; register unsigned int range;
register unsigned int value; VP8_BD_VALUE value;
const int *scan; const int *scan;
register unsigned int shift; register unsigned int shift;
UINT32 split; UINT32 split;
VP8_BD_VALUE bigsplit;
INT16 *qcoeff_ptr; INT16 *qcoeff_ptr;
const vp8_prob *coef_probs; const vp8_prob *coef_probs;
@ -253,10 +255,11 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
qcoeff_ptr = &x->qcoeff[0]; qcoeff_ptr = &x->qcoeff[0];
} }
bufend = bc->user_buffer_end;
bufptr = bc->user_buffer;
value = bc->value;
count = bc->count; count = bc->count;
range = bc->range; range = bc->range;
value = bc->value;
bufptr = bc->read_ptr;
coef_probs = oc->fc.coef_probs [type] [ 0 ] [0]; coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
@ -384,10 +387,11 @@ BLOCK_FINISHED:
goto BLOCK_LOOP; goto BLOCK_LOOP;
} }
bc->count = count; FILL
bc->user_buffer = bufptr;
bc->value = value; bc->value = value;
bc->count = count;
bc->range = range; bc->range = range;
bc->read_ptr = bufptr;
return eobtotal; return eobtotal;
} }

View File

@ -24,7 +24,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
pbi->dequant.idct = vp8_dequant_idct_c; pbi->dequant.idct = vp8_dequant_idct_c;
pbi->dequant.idct_dc = vp8_dequant_dc_idct_c; pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
pbi->dboolhuff.start = vp8dx_start_decode_c; pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.stop = vp8dx_stop_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
#if 0 //For use with RTCD, when implemented #if 0 //For use with RTCD, when implemented
pbi->dboolhuff.debool = vp8dx_decode_bool_c; pbi->dboolhuff.debool = vp8dx_decode_bool_c;

View File

@ -202,7 +202,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
vp8_build_uvmvs(xd, pc->full_pixel); vp8_build_uvmvs(xd, pc->full_pixel);
vp8dx_bool_decoder_fill(xd->current_bc);
vp8_decode_macroblock(pbi, xd); vp8_decode_macroblock(pbi, xd);