Compare commits

...

2 Commits

Author SHA1 Message Date
John Koleszar
4e132e2e45 change to 32k bucket cache
Change-Id: Ia2d6ae89068bd7eadf3c1bb07c58d7cce7b986bd
2011-12-09 11:51:19 -08:00
John Koleszar
3392c96aea basic lookup table working
Change-Id: I4b847355ff58e6a77fdc8a81135a29884a0be615
2011-12-07 11:03:16 -08:00
7 changed files with 283 additions and 5 deletions

View File

@@ -226,6 +226,7 @@ void vp8_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int co
unsigned char d[16];
s += 8; cols -= 16;
for (r = 0; r < rows; r++)
{
int sumsq = 0;
@@ -277,6 +278,7 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i
unsigned char d[16];
const short *rv2 = rv3 + ((c * 17) & 127);
s += 8 * pitch; rows -= 8;
for (i = -8; i <= 6; i++)
{
sumsq += s[i*pitch] * s[i*pitch];
@@ -315,7 +317,7 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
POSTPROC_INVOKE(rtcd, downacross)(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl);
POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
//POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
POSTPROC_INVOKE(rtcd, downacross)(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
POSTPROC_INVOKE(rtcd, downacross)(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);

View File

@@ -25,6 +25,8 @@
#include "defaultcoefcounts.h"
#define printf(...)
const int vp8cx_base_skip_false_prob[128] =
{
255, 255, 255, 255, 255, 255, 255, 255,
@@ -159,6 +161,227 @@ static void write_split(vp8_writer *bc, int x)
);
}
#define MAX_BOOL_CACHE_CONTEXTS ((2048+2*4*8*3*12)*128)
typedef unsigned long long bool_cache_entry_t;
static unsigned char bool_cache_validity1[MAX_BOOL_CACHE_CONTEXTS/128/8];
static unsigned char bool_cache_validity2[MAX_BOOL_CACHE_CONTEXTS/8];
#define BOOL_CACHE_BUCKETS (32*1024)
static bool_cache_entry_t bool_cache_table[BOOL_CACHE_BUCKETS];
static unsigned char bit_n(unsigned char *vector, int n)
{
int idx = n >> 3;
return (vector[idx] >> (n&7)) & 1;
}
static void set_bit_n(unsigned char *vector, int n)
{
int idx = n >> 3;
vector[idx] |= 1 << (n&7);
}
static int bool_cache_hit1;
static int bool_cache_hit2;
static bool_cache_entry_t bool_cache_hit(int context)
{
bool_cache_entry_t entry;
bool_cache_hit1++;
assert(context < MAX_BOOL_CACHE_CONTEXTS);
if(!bit_n(bool_cache_validity1, context>>7))
return 0;
if(!bit_n(bool_cache_validity2, context))
return 0;
entry = bool_cache_table[context & (BOOL_CACHE_BUCKETS-1)];
if((context >> 15) == (entry & 0x1F))
{
bool_cache_hit2++;
return entry;
}
return 0;
}
static void encode_bits_msb(vp8_writer *w,
int v,
const unsigned char *probs,
int n)
{
do
{
vp8_encode_bool(w, (v >> --n) & 1, *probs++);
}
while(n);
}
static void bool_cache(int context, vp8_writer *bc)
{
int shift_count;
bool_cache_entry_t entry;
if(!bit_n(bool_cache_validity1, context>>7))
{
/* This is the first time this top-level context has been hit.
* Invalidate the second level contexts
*/
memset(&bool_cache_validity2[context>>3], 0, 16);
}
set_bit_n(bool_cache_validity1, context>>7);
set_bit_n(bool_cache_validity2, context);
/* Convert the bc into a cache entry */
shift_count = bc->count + 24 + 8 * bc->pos;
entry = bc->lowvalue;
if (bc->pos)
{
assert(bc->pos < 2);
entry += (unsigned long long)bc->buffer[0] << shift_count;
}
entry <<= 6;
entry += shift_count;
entry <<= 7;
entry += bc->range - 128;
entry <<= 5;
entry += context >> 15;
bool_cache_table[context & (BOOL_CACHE_BUCKETS-1)] = entry;
}
static void splice_bits(BOOL_CODER *cx, bool_cache_entry_t to_splice)
{
unsigned int thismask;
int shift_count, count;
unsigned long long lowvalue = cx->lowvalue;
to_splice >>= 5;
cx->range = (to_splice & 0x7F) + 128;
to_splice >>= 7;
shift_count = to_splice & 0x3F;
to_splice >>= 6;
count = cx->count + shift_count;
lowvalue <<= shift_count;
lowvalue += to_splice;
if(count >=0)
{
unsigned long long tmp = lowvalue >> count;
thismask = 0xffffff << (count & 7) | 255;
if(tmp >> 32)
{
int x = cx->pos - 1;
while (x >= 0 && cx->buffer[x] == 0xff)
{
cx->buffer[x] = (unsigned char)0;
x--;
}
cx->buffer[x] += 1;
}
while(count >= 0)
{
int out = (tmp >> 24) & 0xff;
cx->buffer[cx->pos++] = out;
count -= 8;
tmp <<= 8;
}
lowvalue &= thismask;
}
cx->count = count;
cx->lowvalue = lowvalue;
}
void pack_tokens_lut(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
/* For now, we read from the token array */
const TOKENEXTRA *const stop = p + xcount;
while (p < stop)
{
const vp8_extra_bit_struct *b;
int context;
bool_cache_entry_t entry;
/* Encode token */
context = (p->context << 7) + (w->range - 128);
if (!(entry = bool_cache_hit(context)))
{
/* Build the probability vector */
int n, i, j, v;
unsigned char probs[12], buf[4];
vp8_writer tmpbc;
if (p->skip_eob_node)
{
n = vp8_coef_encodings[p->Token].Len - 1;
i = 2;
}
else
{
n = vp8_coef_encodings[p->Token].Len;
i = 0;
}
v = vp8_coef_encodings[p->Token].value;
j=0;
do
{
const int bb = (v >> --n) & 1;
probs[j++] = p->context_tree[i>>1];
i = vp8_coef_tree[i+bb];
} while(n);
/* Generate the appropriate bitstream and cache the result */
vp8_start_encode(&tmpbc, buf, buf+4);
tmpbc.range = w->range;
encode_bits_msb(&tmpbc, v, probs, j);
bool_cache(context, &tmpbc);
entry = bool_cache_hit(context);
}
splice_bits(w, entry);
/* Encode extra bits */
b = vp8_extra_bits + p->Token;
if (b->Len)
{
context = b->base_val + (p->Extra >> 1);
context = (context << 7) + (w->range - 128);
if (!(entry = bool_cache_hit(context)))
{
unsigned char buf[4];
vp8_writer tmpbc;
/* Generate the appropriate bitstream and cache the result */
vp8_start_encode(&tmpbc, buf, buf+4);
tmpbc.range = w->range;
encode_bits_msb(&tmpbc, p->Extra >> 1, b->prob, b->Len);
bool_cache(context, &tmpbc);
entry = bool_cache_hit(context);
}
splice_bits(w, entry);
}
/* Encode sign bit */
if (b->base_val)
vp8_write_bit(w, p->Extra & 1);
/* Next token */
printf("lv=%08x, pos=%ld\n",w->lowvalue,w->pos);
++p;
}
}
static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *const stop = p + xcount;
@@ -187,6 +410,7 @@ static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
do
{
const int bb = (v >> --n) & 1;
//printf("E(%d)=%d (v=%d)\n",pp[i>>1],(v >> n) & 1,v);
split = 1 + (((range - 1) * pp[i>>1]) >> 8);
i = vp8_coef_tree[i+bb];
@@ -252,6 +476,7 @@ static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
do
{
const int bb = (v >> --n) & 1;
//printf("E(%d)=%d (v=%d)\n",pp[i>>1],(v >> n) & 1,v);
split = 1 + (((range - 1) * pp[i>>1]) >> 8);
i = b->tree[i+bb];
@@ -352,6 +577,7 @@ static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
}
printf("lv=%08x, pos=%ld\n",lowvalue,w->pos);
++p;
}
@@ -1328,7 +1554,7 @@ static int default_coef_context_savings(VP8_COMP *cpi)
MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
cpi->frame_coef_probs [i][j][k],
cpi->frame_branch_ct [i][j][k],
cpi->coef_counts [i][j][k],
default_coef_counts [i][j][k],
256, 1
);
@@ -1569,6 +1795,7 @@ static void put_delta_q(vp8_writer *bc, int delta_q)
vp8_write_bit(bc, 0);
}
char tmp[128*1024];
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest_end, unsigned long *size)
{
int i, j;
@@ -1931,7 +2158,22 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
pack_mb_row_tokens(cpi, &cpi->bc[1]);
else
#endif
pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
{
vp8_writer tmpbc;
vp8_start_encode(&tmpbc, tmp, tmp+sizeof(tmp));
//pack_tokens_c(&tmpbc, cpi->tok, cpi->tok_count);
pack_tokens_lut(&cpi->bc[1], cpi->tok, cpi->tok_count);
//assert(cpi->bc[1].lowvalue == tmpbc.lowvalue);
//assert(cpi->bc[1].pos == tmpbc.pos);
}
//#undef printf
printf("cache hit ratio: %d/%d=%f\n",
bool_cache_hit2,
bool_cache_hit1,
bool_cache_hit2/(float)bool_cache_hit1);
vp8_stop_encode(&cpi->bc[1]);

View File

@@ -24,7 +24,7 @@
typedef struct
{
unsigned int lowvalue;
unsigned long long lowvalue;
unsigned int range;
unsigned int value;
int count;

View File

@@ -492,7 +492,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
// if we encode a new mv this is important
// find the best new motion vector
for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
for (mode_index = 0; mode_index < 1; mode_index++)
{
int frame_cost;
int this_rd = INT_MAX;

View File

@@ -176,6 +176,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
filt_val = cm->filter_level;
best_filt_val = filt_val;
goto skip;
// Get the err using the previous frame's filter value.
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
@@ -246,6 +247,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
}
}
skip:
cm->filter_level = best_filt_val;
if (cm->filter_level < min_filter_level)
@@ -311,6 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
filt_best = filt_mid;
goto skip;
// Re-instate the unfiltered frame
vp8_yv12_copy_y_ptr(&cpi->last_frame_uf, cm->frame_to_show);
@@ -379,5 +382,6 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
}
}
skip:
cm->filter_level = filt_best;
}

View File

@@ -93,6 +93,20 @@ static void fill_value_tokens()
vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE;
}
static void calculate_context(VP8_COMP *cpi, TOKENEXTRA *t)
{
int context;
context = (t->context_tree - cpi->common.fc.coef_probs[0][0][0])
/ sizeof(cpi->common.fc.coef_probs[0][0][0]);
context = context * 12 + t->Token;
if(t->skip_eob_node)
context += 4*8*3*12;
t->context = context + 2048;
}
static void tokenize2nd_order_b
(
MACROBLOCKD *x,
@@ -124,6 +138,7 @@ static void tokenize2nd_order_b
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
t++;
*tp = t;
@@ -138,6 +153,7 @@ static void tokenize2nd_order_b
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [1] [0] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
@@ -157,6 +173,7 @@ static void tokenize2nd_order_b
t->skip_eob_node = ((pt == 0));
calculate_context(cpi, t);
++cpi->coef_counts [1] [band] [pt] [token];
pt = vp8_prev_token_class[token];
@@ -170,6 +187,7 @@ static void tokenize2nd_order_b
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN];
t++;
@@ -221,6 +239,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [type] [c] [pt] [DCT_EOB_TOKEN];
t++;
*tp = t;
@@ -236,6 +255,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [c] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [type] [c] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
@@ -254,6 +274,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->skip_eob_node = (pt == 0);
calculate_context(cpi, t);
++cpi->coef_counts [type] [band] [pt] [token];
pt = vp8_prev_token_class[token];
@@ -266,6 +287,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN];
t++;
@@ -292,6 +314,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [2] [0] [pt] [DCT_EOB_TOKEN];
t++;
*tp = t;
@@ -307,6 +330,7 @@ static void tokenize1st_order_b
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [2] [0] [pt] [token];
pt = vp8_prev_token_class[token];
t++;
@@ -326,6 +350,7 @@ static void tokenize1st_order_b
t->skip_eob_node = (pt == 0);
calculate_context(cpi, t);
++cpi->coef_counts [2] [band] [pt] [token];
pt = vp8_prev_token_class[token];
@@ -339,6 +364,7 @@ static void tokenize1st_order_b
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN];
t++;
@@ -500,6 +526,7 @@ static __inline void stuff2nd_order_b
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [1] [0] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [1] [0] [pt] [DCT_EOB_TOKEN];
++t;
@@ -524,6 +551,7 @@ static __inline void stuff1st_order_b
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [0] [1] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts [0] [1] [pt] [DCT_EOB_TOKEN];
++t;
*tp = t;
@@ -547,6 +575,7 @@ void stuff1st_order_buv
t->Token = DCT_EOB_TOKEN;
t->context_tree = cpi->common.fc.coef_probs [2] [0] [pt];
t->skip_eob_node = 0;
calculate_context(cpi, t);
++cpi->coef_counts[2] [0] [pt] [DCT_EOB_TOKEN];
++t;
*tp = t;

View File

@@ -29,6 +29,7 @@ typedef struct
short Extra;
unsigned char Token;
unsigned char skip_eob_node;
unsigned int context;
} TOKENEXTRA;
int rd_cost_mby(MACROBLOCKD *);