Merge remote branch 'origin/master' into experimental

Change-Id: I11cd10dba54d0f3f96640dadc97199e5733f1888
2010-12-04 00:05:07 -05:00 · 2010-12-04 00:05:07 -05:00 · 16724b7c93
commit 16724b7c93
parent ea2a5754b4 5e76dfcc70
7 changed files with 95 additions and 9 deletions
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@ -81,6 +81,7 @@ typedef struct
    int errthresh;
    int rddiv;
    int rdmult;
    INT64 activity_sum;
    int mvcosts[2][MVvals+1];
    int *mvcost[2];
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@ -375,6 +375,62 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 }
 /* activity_avg must be positive, or flat regions could get a zero weight
 *  (infinite lambda), which confounds analysis.
 * This also avoids the need for divide by zero checks in
 *  vp8_activity_masking().
 */
 #define VP8_ACTIVITY_AVG_MIN (64)
 /* This is used as a reference when computing the source variance for the
 *  purposes of activity masking.
 * Eventually this should be replaced by custom no-reference routines,
 *  which will be faster.
 */
 static const unsigned char VP8_VAR_OFFS[16]=
 {
    128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 };
 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 {
    unsigned int act;
    unsigned int sse;
    int sum;
    unsigned int a;
    unsigned int b;
    unsigned int d;
    /* TODO: This could also be done over smaller areas (8x8), but that would
     *  require extensive changes elsewhere, as lambda is assumed to be fixed
     *  over an entire MB in most of the code.
     * Another option is to compute four 8x8 variances, and pick a single
     *  lambda using a non-linear combination (e.g., the smallest, or second
     *  smallest, etc.).
     */
    VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
     x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
    /* This requires a full 32 bits of precision. */
    act = (sse<<8) - sum*sum;
    /* Drop 4 to give us some headroom to work with. */
    act = (act + 8) >> 4;
    /* If the region is flat, lower the activity some more. */
    if (act < 8<<12)
        act = act < 5<<12 ? act : 5<<12;
    /* TODO: For non-flat regions, edge regions should receive less masking
     *  than textured regions, but identifying edge regions quickly and
     *  reliably enough is still a subject of experimentation.
     * This will be most noticable near edges with a complex shape (e.g.,
     *  text), but the 4x4 transform size should make this less of a problem
     *  than it would be for an 8x8 transform.
     */
    /* Apply the masking to the RD multiplier. */
    a = act + 4*cpi->activity_avg;
    b = 4*act + cpi->activity_avg;
    x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
    return act;
 }
 static
 void encode_mb_row(VP8_COMP *cpi,
@ -386,6 +442,7 @@ void encode_mb_row(VP8_COMP *cpi,
                   int *segment_counts,
                   int *totalrate)
 {
    INT64 activity_sum = 0;
    int i;
    int recon_yoffset, recon_uvoffset;
    int mb_col;
@ -437,6 +494,11 @@ void encode_mb_row(VP8_COMP *cpi,
        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
        xd->left_available = (mb_col != 0);
        x->rddiv = cpi->RDDIV;
        x->rdmult = cpi->RDMULT;
        activity_sum += vp8_activity_masking(cpi, x);
        // Is segmentation enabled
        // MB level adjutment to quantizer
        if (xd->segmentation_enabled)
@ -543,6 +605,7 @@ void encode_mb_row(VP8_COMP *cpi,
    // this is to account for the border
    xd->mode_info_context++;
    x->partition_info++;
    x->activity_sum += activity_sum;
 }
@ -659,8 +722,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
    vp8_setup_block_ptrs(x);
-    x->rddiv = cpi->RDDIV;
+    x->activity_sum = 0;
    x->rdmult = cpi->RDMULT;
 #if 0
    // Experimental rd code
@ -715,11 +777,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
        else
        {
 #if CONFIG_MULTITHREAD
            int i;
            vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
            for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
            {
                int i;
                cpi->current_mb_col_main = -1;
                for (i = 0; i < cpi->encoding_thread_count; i++)
@ -797,6 +860,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
                totalrate += cpi->mb_row_ei[i].totalrate;
            }
            for (i = 0; i < cpi->encoding_thread_count; i++)
            {
                x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
            }
 #endif
        }
@ -932,6 +1000,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
    cpi->last_frame_distortion = cpi->frame_distortion;
 #endif
    /* Update the average activity for the next frame.
     * This is feed-forward for now; it could also be saved in two-pass, or
     *  done during lookahead when that is eventually added.
     */
    cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
    if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
        cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
 }
 void vp8_setup_block_ptrs(MACROBLOCK *x)
 {
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 #if !(CONFIG_REALTIME_ONLY)
 #if 1
-    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
+    if (x->optimize)
        vp8_optimize_mby(x, rtcd);
 #endif
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@ -635,7 +635,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
    vp8_quantize_mb(x);
 #if !(CONFIG_REALTIME_ONLY)
-    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
+    if (x->optimize)
        vp8_optimize_mb(x, rtcd);
 #endif
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@ -61,6 +61,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
                    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
                    volatile int *last_row_current_mb_col;
                    INT64 activity_sum = 0;
                    if (ithread > 0)
                        last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
@ -111,6 +112,11 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
                        xd->left_available = (mb_col != 0);
                        x->rddiv = cpi->RDDIV;
                        x->rdmult = cpi->RDMULT;
                        activity_sum += vp8_activity_masking(cpi, x);
                        // Is segmentation enabled
                        // MB level adjutment to quantizer
                        if (xd->segmentation_enabled)
@ -197,6 +203,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                    // this is to account for the border
                    xd->mode_info_context++;
                    x->partition_info++;
                    x->activity_sum += activity_sum;
                    x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
                    x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
@ -240,8 +247,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
    z->sadperbit16      = x->sadperbit16;
    z->sadperbit4       = x->sadperbit4;
    z->errthresh        = x->errthresh;
    z->rddiv            = x->rddiv;
    z->rdmult           = x->rdmult;
    /*
    z->mv_col_min    = x->mv_col_min;
@ -392,8 +397,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
        vp8_setup_block_ptrs(mb);
-        mb->rddiv = cpi->RDDIV;
+        mb->activity_sum = 0;
        mb->rdmult = cpi->RDMULT;
        mbd->left_context = &cm->left_context;
        mb->mvc = cm->fc.mvc;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@ -2205,6 +2205,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
    init_context_counters();
 #endif
    /*Initialize the feed-forward activity masking.*/
    cpi->activity_avg = 90<<12;
    cpi->frames_since_key = 8;        // Give a sensible default for the first frame.
    cpi->key_frame_frequency = cpi->oxcf.key_freq;
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@ -321,6 +321,7 @@ typedef struct
    int mvcostmultiplier;
    int subseqblockweight;
    int errthresh;
    unsigned int activity_avg;
    int RDMULT;
    int RDDIV ;
@ -676,6 +677,8 @@ void vp8_encode_frame(VP8_COMP *cpi);
 void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
 int rd_cost_intra_mb(MACROBLOCKD *x);
 void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);