diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index e94e54976..90b42c35c 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -81,6 +81,7 @@ typedef struct int errthresh; int rddiv; int rdmult; + INT64 activity_sum; int mvcosts[2][MVvals+1]; int *mvcost[2]; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 2aac20b31..2002735d2 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -375,6 +375,62 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi) } +/* activity_avg must be positive, or flat regions could get a zero weight + * (infinite lambda), which confounds analysis. + * This also avoids the need for divide by zero checks in + * vp8_activity_masking(). + */ +#define VP8_ACTIVITY_AVG_MIN (64) + +/* This is used as a reference when computing the source variance for the + * purposes of activity masking. + * Eventually this should be replaced by custom no-reference routines, + * which will be faster. + */ +static const unsigned char VP8_VAR_OFFS[16]= +{ + 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 +}; + +unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) +{ + unsigned int act; + unsigned int sse; + int sum; + unsigned int a; + unsigned int b; + unsigned int d; + /* TODO: This could also be done over smaller areas (8x8), but that would + * require extensive changes elsewhere, as lambda is assumed to be fixed + * over an entire MB in most of the code. + * Another option is to compute four 8x8 variances, and pick a single + * lambda using a non-linear combination (e.g., the smallest, or second + * smallest, etc.). + */ + VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer, + x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum); + /* This requires a full 32 bits of precision. */ + act = (sse<<8) - sum*sum; + /* Drop 4 to give us some headroom to work with. */ + act = (act + 8) >> 4; + /* If the region is flat, lower the activity some more. */ + if (act < 8<<12) + act = act < 5<<12 ? act : 5<<12; + /* TODO: For non-flat regions, edge regions should receive less masking + * than textured regions, but identifying edge regions quickly and + * reliably enough is still a subject of experimentation. + * This will be most noticable near edges with a complex shape (e.g., + * text), but the 4x4 transform size should make this less of a problem + * than it would be for an 8x8 transform. + */ + /* Apply the masking to the RD multiplier. */ + a = act + 4*cpi->activity_avg; + b = 4*act + cpi->activity_avg; + x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a); + return act; +} + + static void encode_mb_row(VP8_COMP *cpi, @@ -386,6 +442,7 @@ void encode_mb_row(VP8_COMP *cpi, int *segment_counts, int *totalrate) { + INT64 activity_sum = 0; int i; int recon_yoffset, recon_uvoffset; int mb_col; @@ -437,6 +494,11 @@ void encode_mb_row(VP8_COMP *cpi, xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + + activity_sum += vp8_activity_masking(cpi, x); + // Is segmentation enabled // MB level adjutment to quantizer if (xd->segmentation_enabled) @@ -543,6 +605,7 @@ void encode_mb_row(VP8_COMP *cpi, // this is to account for the border xd->mode_info_context++; x->partition_info++; + x->activity_sum += activity_sum; } @@ -659,8 +722,7 @@ void vp8_encode_frame(VP8_COMP *cpi) vp8_setup_block_ptrs(x); - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; + x->activity_sum = 0; #if 0 // Experimental rd code @@ -715,11 +777,12 @@ void vp8_encode_frame(VP8_COMP *cpi) else { #if CONFIG_MULTITHREAD + int i; + vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count); for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) { - int i; cpi->current_mb_col_main = -1; for (i = 0; i < cpi->encoding_thread_count; i++) @@ -797,6 +860,11 @@ void vp8_encode_frame(VP8_COMP *cpi) totalrate += cpi->mb_row_ei[i].totalrate; } + for (i = 0; i < cpi->encoding_thread_count; i++) + { + x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum; + } + #endif } @@ -932,6 +1000,14 @@ void vp8_encode_frame(VP8_COMP *cpi) cpi->last_frame_distortion = cpi->frame_distortion; #endif + /* Update the average activity for the next frame. + * This is feed-forward for now; it could also be saved in two-pass, or + * done during lookahead when that is eventually added. + */ + cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs); + if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) + cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; + } void vp8_setup_block_ptrs(MACROBLOCK *x) { diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 1c72b90f1..0f327cec0 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) #if !(CONFIG_REALTIME_ONLY) #if 1 - if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) + if (x->optimize) vp8_optimize_mby(x, rtcd); #endif diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index e9753ac48..f7faaa14a 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -635,7 +635,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_quantize_mb(x); #if !(CONFIG_REALTIME_ONLY) - if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) + if (x->optimize) vp8_optimize_mb(x, rtcd); #endif diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 962e74174..3646375ed 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -61,6 +61,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; volatile int *last_row_current_mb_col; + INT64 activity_sum = 0; if (ithread > 0) last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col; @@ -111,6 +112,11 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + + activity_sum += vp8_activity_masking(cpi, x); + // Is segmentation enabled // MB level adjutment to quantizer if (xd->segmentation_enabled) @@ -197,6 +203,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) // this is to account for the border xd->mode_info_context++; x->partition_info++; + x->activity_sum += activity_sum; x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; @@ -240,8 +247,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->sadperbit16 = x->sadperbit16; z->sadperbit4 = x->sadperbit4; z->errthresh = x->errthresh; - z->rddiv = x->rddiv; - z->rdmult = x->rdmult; /* z->mv_col_min = x->mv_col_min; @@ -392,8 +397,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, vp8_setup_block_ptrs(mb); - mb->rddiv = cpi->RDDIV; - mb->rdmult = cpi->RDMULT; + mb->activity_sum = 0; mbd->left_context = &cm->left_context; mb->mvc = cm->fc.mvc; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index ea72de2de..46571156b 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -2205,6 +2205,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) init_context_counters(); #endif + /*Initialize the feed-forward activity masking.*/ + cpi->activity_avg = 90<<12; cpi->frames_since_key = 8; // Give a sensible default for the first frame. cpi->key_frame_frequency = cpi->oxcf.key_freq; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 0eaba0017..1471c1dd0 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -321,6 +321,7 @@ typedef struct int mvcostmultiplier; int subseqblockweight; int errthresh; + unsigned int activity_avg; int RDMULT; int RDDIV ; @@ -676,6 +677,8 @@ void vp8_encode_frame(VP8_COMP *cpi); void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size); +unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x); + int rd_cost_intra_mb(MACROBLOCKD *x); void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);