Merge remote branch 'origin/master' into experimental

Change-Id: I11cd10dba54d0f3f96640dadc97199e5733f1888
This commit is contained in:
John Koleszar 2010-12-04 00:05:07 -05:00
commit 16724b7c93
7 changed files with 95 additions and 9 deletions

View File

@ -81,6 +81,7 @@ typedef struct
int errthresh; int errthresh;
int rddiv; int rddiv;
int rdmult; int rdmult;
INT64 activity_sum;
int mvcosts[2][MVvals+1]; int mvcosts[2][MVvals+1];
int *mvcost[2]; int *mvcost[2];

View File

@ -375,6 +375,62 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
} }
/* activity_avg must be positive, or flat regions could get a zero weight
* (infinite lambda), which confounds analysis.
* This also avoids the need for divide by zero checks in
* vp8_activity_masking().
*/
#define VP8_ACTIVITY_AVG_MIN (64)
/* This is used as a reference when computing the source variance for the
* purposes of activity masking.
* Eventually this should be replaced by custom no-reference routines,
* which will be faster.
*/
static const unsigned char VP8_VAR_OFFS[16]=
{
128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
};
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
{
unsigned int act;
unsigned int sse;
int sum;
unsigned int a;
unsigned int b;
unsigned int d;
/* TODO: This could also be done over smaller areas (8x8), but that would
* require extensive changes elsewhere, as lambda is assumed to be fixed
* over an entire MB in most of the code.
* Another option is to compute four 8x8 variances, and pick a single
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
/* This requires a full 32 bits of precision. */
act = (sse<<8) - sum*sum;
/* Drop 4 to give us some headroom to work with. */
act = (act + 8) >> 4;
/* If the region is flat, lower the activity some more. */
if (act < 8<<12)
act = act < 5<<12 ? act : 5<<12;
/* TODO: For non-flat regions, edge regions should receive less masking
* than textured regions, but identifying edge regions quickly and
* reliably enough is still a subject of experimentation.
* This will be most noticable near edges with a complex shape (e.g.,
* text), but the 4x4 transform size should make this less of a problem
* than it would be for an 8x8 transform.
*/
/* Apply the masking to the RD multiplier. */
a = act + 4*cpi->activity_avg;
b = 4*act + cpi->activity_avg;
x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
return act;
}
static static
void encode_mb_row(VP8_COMP *cpi, void encode_mb_row(VP8_COMP *cpi,
@ -386,6 +442,7 @@ void encode_mb_row(VP8_COMP *cpi,
int *segment_counts, int *segment_counts,
int *totalrate) int *totalrate)
{ {
INT64 activity_sum = 0;
int i; int i;
int recon_yoffset, recon_uvoffset; int recon_yoffset, recon_uvoffset;
int mb_col; int mb_col;
@ -437,6 +494,11 @@ void encode_mb_row(VP8_COMP *cpi,
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0); xd->left_available = (mb_col != 0);
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
activity_sum += vp8_activity_masking(cpi, x);
// Is segmentation enabled // Is segmentation enabled
// MB level adjutment to quantizer // MB level adjutment to quantizer
if (xd->segmentation_enabled) if (xd->segmentation_enabled)
@ -543,6 +605,7 @@ void encode_mb_row(VP8_COMP *cpi,
// this is to account for the border // this is to account for the border
xd->mode_info_context++; xd->mode_info_context++;
x->partition_info++; x->partition_info++;
x->activity_sum += activity_sum;
} }
@ -659,8 +722,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
vp8_setup_block_ptrs(x); vp8_setup_block_ptrs(x);
x->rddiv = cpi->RDDIV; x->activity_sum = 0;
x->rdmult = cpi->RDMULT;
#if 0 #if 0
// Experimental rd code // Experimental rd code
@ -715,11 +777,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
else else
{ {
#if CONFIG_MULTITHREAD #if CONFIG_MULTITHREAD
int i;
vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
{ {
int i;
cpi->current_mb_col_main = -1; cpi->current_mb_col_main = -1;
for (i = 0; i < cpi->encoding_thread_count; i++) for (i = 0; i < cpi->encoding_thread_count; i++)
@ -797,6 +860,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
totalrate += cpi->mb_row_ei[i].totalrate; totalrate += cpi->mb_row_ei[i].totalrate;
} }
for (i = 0; i < cpi->encoding_thread_count; i++)
{
x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
}
#endif #endif
} }
@ -932,6 +1000,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
cpi->last_frame_distortion = cpi->frame_distortion; cpi->last_frame_distortion = cpi->frame_distortion;
#endif #endif
/* Update the average activity for the next frame.
* This is feed-forward for now; it could also be saved in two-pass, or
* done during lookahead when that is eventually added.
*/
cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
} }
void vp8_setup_block_ptrs(MACROBLOCK *x) void vp8_setup_block_ptrs(MACROBLOCK *x)
{ {

View File

@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
#if !(CONFIG_REALTIME_ONLY) #if !(CONFIG_REALTIME_ONLY)
#if 1 #if 1
if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) if (x->optimize)
vp8_optimize_mby(x, rtcd); vp8_optimize_mby(x, rtcd);
#endif #endif

View File

@ -635,7 +635,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
vp8_quantize_mb(x); vp8_quantize_mb(x);
#if !(CONFIG_REALTIME_ONLY) #if !(CONFIG_REALTIME_ONLY)
if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) if (x->optimize)
vp8_optimize_mb(x, rtcd); vp8_optimize_mb(x, rtcd);
#endif #endif

View File

@ -61,6 +61,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
volatile int *last_row_current_mb_col; volatile int *last_row_current_mb_col;
INT64 activity_sum = 0;
if (ithread > 0) if (ithread > 0)
last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col; last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
@ -111,6 +112,11 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
xd->left_available = (mb_col != 0); xd->left_available = (mb_col != 0);
x->rddiv = cpi->RDDIV;
x->rdmult = cpi->RDMULT;
activity_sum += vp8_activity_masking(cpi, x);
// Is segmentation enabled // Is segmentation enabled
// MB level adjutment to quantizer // MB level adjutment to quantizer
if (xd->segmentation_enabled) if (xd->segmentation_enabled)
@ -197,6 +203,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
// this is to account for the border // this is to account for the border
xd->mode_info_context++; xd->mode_info_context++;
x->partition_info++; x->partition_info++;
x->activity_sum += activity_sum;
x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
@ -240,8 +247,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->sadperbit16 = x->sadperbit16; z->sadperbit16 = x->sadperbit16;
z->sadperbit4 = x->sadperbit4; z->sadperbit4 = x->sadperbit4;
z->errthresh = x->errthresh; z->errthresh = x->errthresh;
z->rddiv = x->rddiv;
z->rdmult = x->rdmult;
/* /*
z->mv_col_min = x->mv_col_min; z->mv_col_min = x->mv_col_min;
@ -392,8 +397,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
vp8_setup_block_ptrs(mb); vp8_setup_block_ptrs(mb);
mb->rddiv = cpi->RDDIV; mb->activity_sum = 0;
mb->rdmult = cpi->RDMULT;
mbd->left_context = &cm->left_context; mbd->left_context = &cm->left_context;
mb->mvc = cm->fc.mvc; mb->mvc = cm->fc.mvc;

View File

@ -2205,6 +2205,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
init_context_counters(); init_context_counters();
#endif #endif
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90<<12;
cpi->frames_since_key = 8; // Give a sensible default for the first frame. cpi->frames_since_key = 8; // Give a sensible default for the first frame.
cpi->key_frame_frequency = cpi->oxcf.key_freq; cpi->key_frame_frequency = cpi->oxcf.key_freq;

View File

@ -321,6 +321,7 @@ typedef struct
int mvcostmultiplier; int mvcostmultiplier;
int subseqblockweight; int subseqblockweight;
int errthresh; int errthresh;
unsigned int activity_avg;
int RDMULT; int RDMULT;
int RDDIV ; int RDDIV ;
@ -676,6 +677,8 @@ void vp8_encode_frame(VP8_COMP *cpi);
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size); void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
int rd_cost_intra_mb(MACROBLOCKD *x); int rd_cost_intra_mb(MACROBLOCKD *x);
void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **); void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);