Merge remote branch 'origin/master' into experimental

Change-Id: I11cd10dba54d0f3f96640dadc97199e5733f1888
This commit is contained in:
John Koleszar 2010-12-04 00:05:07 -05:00
commit 16724b7c93
7 changed files with 95 additions and 9 deletions

View File

@ -81,6 +81,7 @@ typedef struct
int errthresh;
int rddiv;
int rdmult;
INT64 activity_sum;
int mvcosts[2][MVvals+1];
int *mvcost[2];

View File

@ -375,6 +375,62 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
}
/* activity_avg must be positive, or flat regions could get a zero weight
* (infinite lambda), which confounds analysis.
* This also avoids the need for divide by zero checks in
* vp8_activity_masking().
*/
#define VP8_ACTIVITY_AVG_MIN (64)
/* This is used as a reference when computing the source variance for the
* purposes of activity masking.
* Eventually this should be replaced by custom no-reference routines,
* which will be faster.
*/
static const unsigned char VP8_VAR_OFFS[16]=
{
128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
};
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
{
unsigned int act;
unsigned int sse;
int sum;
unsigned int a;
unsigned int b;
unsigned int d;
/* TODO: This could also be done over smaller areas (8x8), but that would
* require extensive changes elsewhere, as lambda is assumed to be fixed
* over an entire MB in most of the code.
* Another option is to compute four 8x8 variances, and pick a single
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
/* This requires a full 32 bits of precision. */
act = (sse<<8) - sum*sum;
/* Drop 4 to give us some headroom to work with. */
act = (act + 8) >> 4;
/* If the region is flat, lower the activity some more. */
if (act < 8<<12)
act = act < 5<<12 ? act : 5<<12;
/* TODO: For non-flat regions, edge regions should receive less masking
* than textured regions, but identifying edge regions quickly and
* reliably enough is still a subject of experimentation.
* This will be most noticable near edges with a complex shape (e.g.,
* text), but the 4x4 transform size should make this less of a problem
* than it would be for an 8x8 transform.
*/
/* Apply the masking to the RD multiplier. */
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
return act;
}
static
void encode_mb_row(VP8_COMP *cpi,
@ -386,6 +442,7 @@ void encode_mb_row(VP8_COMP *cpi,
int *segment_counts,
int *totalrate)
{
INT64 activity_sum = 0;
int i;
int recon_yoffset, recon_uvoffset;
int mb_col;
@ -437,6 +494,11 @@ void encode_mb_row(VP8_COMP *cpi,
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
activity_sum += vp8_activity_masking(cpi, x);
// Is segmentation enabled
// MB level adjutment to quantizer
if (xd->segmentation_enabled)
@ -543,6 +605,7 @@ void encode_mb_row(VP8_COMP *cpi,
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
x->activity_sum += activity_sum;
}
@ -659,8 +722,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
vp8_setup_block_ptrs(x);
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
x->activity_sum = 0;
#if 0
// Experimental rd code
@ -715,11 +777,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
else
{
#if CONFIG_MULTITHREAD
int i;
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
{
int i;
cpi->current_mb_col_main = -1;
for (i = 0; i < cpi->encoding_thread_count; i++)
@ -797,6 +860,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
totalrate += cpi->mb_row_ei[i].totalrate;
}
for (i = 0; i < cpi->encoding_thread_count; i++)
{
x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
}
#endif
}
@ -932,6 +1000,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
/* Update the average activity for the next frame.
* This is feed-forward for now; it could also be saved in two-pass, or
* done during lookahead when that is eventually added.
*/
cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
}
void vp8_setup_block_ptrs(MACROBLOCK *x)
{

View File

@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
#if !(CONFIG_REALTIME_ONLY)
#if 1
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
if (x->optimize)
vp8_optimize_mby(x, rtcd);
#endif

View File

@ -635,7 +635,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mb(x);
#if !(CONFIG_REALTIME_ONLY)
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
if (x->optimize)
vp8_optimize_mb(x, rtcd);
#endif

View File

@ -61,6 +61,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
volatile int *last_row_current_mb_col;
INT64 activity_sum = 0;
if (ithread > 0)
last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
@ -111,6 +112,11 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0);
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
activity_sum += vp8_activity_masking(cpi, x);
// Is segmentation enabled
// MB level adjutment to quantizer
if (xd->segmentation_enabled)
@ -197,6 +203,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// this is to account for the border
xd->mode_info_context++;
x->partition_info++;
x->activity_sum += activity_sum;
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
@ -240,8 +247,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->sadperbit16 = x->sadperbit16;
z->sadperbit4 = x->sadperbit4;
z->errthresh = x->errthresh;
z->rddiv = x->rddiv;
z->rdmult = x->rdmult;
/*
z->mv_col_min = x->mv_col_min;
@ -392,8 +397,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
vp8_setup_block_ptrs(mb);
mb->rddiv = cpi->RDDIV;
mb->rdmult = cpi->RDMULT;
mb->activity_sum = 0;
mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc;

View File

@ -2205,6 +2205,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
init_context_counters();
#endif
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90<<12;
cpi->frames_since_key = 8; // Give a sensible default for the first frame.
cpi->key_frame_frequency = cpi->oxcf.key_freq;

View File

@ -321,6 +321,7 @@ typedef struct
int mvcostmultiplier;
int subseqblockweight;
int errthresh;
unsigned int activity_avg;
int RDMULT;
int RDDIV ;
@ -676,6 +677,8 @@ void vp8_encode_frame(VP8_COMP *cpi);
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
int rd_cost_intra_mb(MACROBLOCKD *x);
void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);