Merge "Performance improvement of first pass"

This commit is contained in:
Scott LaVarnway 2011-01-31 13:02:23 -08:00 committed by Code Review
commit 60fde4d342

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "math.h" #include "math.h"
#include "limits.h" #include "limits.h"
#include "block.h" #include "block.h"
@ -178,40 +177,68 @@ static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
return modified_err; return modified_err;
} }
static const double weight_table[256] = {
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000,
0.020000, 0.031250, 0.062500, 0.093750, 0.125000, 0.156250, 0.187500, 0.218750,
0.250000, 0.281250, 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750,
0.500000, 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750,
0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, 0.968750,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000
};
double vp8_simple_weight(YV12_BUFFER_CONFIG *source) double vp8_simple_weight(YV12_BUFFER_CONFIG *source)
{ {
int i, j; int i, j;
unsigned char *src = source->y_buffer; unsigned char *src = source->y_buffer;
unsigned char value;
double sum_weights = 0.0; double sum_weights = 0.0;
double Weight;
// Loop throught the Y plane raw examining levels and creating a weight for the image // Loop throught the Y plane raw examining levels and creating a weight for the image
for (i = 0; i < source->y_height; i++) i = source->y_height;
do
{ {
for (j = 0; j < source->y_width; j++) j = source->y_width;
do
{ {
value = src[j]; sum_weights += weight_table[ *src];
src++;
if (value >= 64) }while(--j);
Weight = 1.0; src -= source->y_width;
else if (value > 32)
Weight = (value - 32.0f) / 32.0f;
else
Weight = 0.02;
sum_weights += Weight;
}
src += source->y_stride; src += source->y_stride;
} }while(--i);
sum_weights /= (source->y_height * source->y_width); sum_weights /= (source->y_height * source->y_width);
return sum_weights; return sum_weights;
} }
// This function returns the current per frame maximum bitrate target // This function returns the current per frame maximum bitrate target
int frame_max_bits(VP8_COMP *cpi) int frame_max_bits(VP8_COMP *cpi)
{ {
@ -440,7 +467,6 @@ void vp8_end_first_pass(VP8_COMP *cpi)
vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats); vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
} }
void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset ) void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
{ {
MACROBLOCKD * const xd = & x->e_mbd; MACROBLOCKD * const xd = & x->e_mbd;
@ -460,7 +486,6 @@ void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * r
VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err));
} }
void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset ) void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset )
{ {
MACROBLOCKD *const xd = & x->e_mbd; MACROBLOCKD *const xd = & x->e_mbd;
@ -548,7 +573,6 @@ void vp8_first_pass(VP8_COMP *cpi)
int sum_in_vectors = 0; int sum_in_vectors = 0;
MV best_ref_mv = {0, 0};
MV zero_ref_mv = {0, 0}; MV zero_ref_mv = {0, 0};
unsigned char *fp_motion_map_ptr = cpi->fp_motion_map; unsigned char *fp_motion_map_ptr = cpi->fp_motion_map;
@ -586,13 +610,20 @@ void vp8_first_pass(VP8_COMP *cpi)
// for each macroblock row in image // for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{ {
MV best_ref_mv = {0, 0}; int_mv best_ref_mv;
best_ref_mv.as_int = 0;
// reset above block coeffs // reset above block coeffs
xd->up_available = (mb_row != 0); xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16); recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8); recon_uvoffset = (mb_row * recon_uv_stride * 8);
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
// for each macroblock col in image // for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{ {
@ -625,8 +656,6 @@ void vp8_first_pass(VP8_COMP *cpi)
// Set up limit values for motion vectors to prevent them extending outside the UMV borders // Set up limit values for motion vectors to prevent them extending outside the UMV borders
x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
// Other than for the first frame do a motion search // Other than for the first frame do a motion search
if (cm->current_video_frame > 0) if (cm->current_video_frame > 0)
@ -647,12 +676,12 @@ void vp8_first_pass(VP8_COMP *cpi)
// Test last reference frame using the previous best mv as the // Test last reference frame using the previous best mv as the
// starting point (best reference) for the search // starting point (best reference) for the search
vp8_first_pass_motion_search(cpi, x, &best_ref_mv, vp8_first_pass_motion_search(cpi, x, &best_ref_mv.as_mv,
&d->bmi.mv.as_mv, lst_yv12, &d->bmi.mv.as_mv, lst_yv12,
&motion_error, recon_yoffset); &motion_error, recon_yoffset);
// If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0)) if (best_ref_mv.as_int)
{ {
tmp_err = INT_MAX; tmp_err = INT_MAX;
vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
@ -664,7 +693,6 @@ void vp8_first_pass(VP8_COMP *cpi)
d->bmi.mv.as_mv.row = tmp_mv.row; d->bmi.mv.as_mv.row = tmp_mv.row;
d->bmi.mv.as_mv.col = tmp_mv.col; d->bmi.mv.as_mv.col = tmp_mv.col;
} }
} }
// Experimental search in a second reference frame ((0,0) based only) // Experimental search in a second reference frame ((0,0) based only)
@ -693,6 +721,9 @@ void vp8_first_pass(VP8_COMP *cpi)
xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
} }
/* Intra assumed best */
best_ref_mv.as_int = 0;
if (motion_error <= this_error) if (motion_error <= this_error)
{ {
d->bmi.mv.as_mv.row <<= 3; d->bmi.mv.as_mv.row <<= 3;
@ -708,13 +739,10 @@ void vp8_first_pass(VP8_COMP *cpi)
sum_mvcs += d->bmi.mv.as_mv.col * d->bmi.mv.as_mv.col; sum_mvcs += d->bmi.mv.as_mv.col * d->bmi.mv.as_mv.col;
intercount++; intercount++;
best_ref_mv.row = d->bmi.mv.as_mv.row; best_ref_mv.as_int = d->bmi.mv.as_int;
best_ref_mv.col = d->bmi.mv.as_mv.col;
//best_ref_mv.row = 0;
//best_ref_mv.col = 0;
// Was the vector non-zero // Was the vector non-zero
if (d->bmi.mv.as_mv.row || d->bmi.mv.as_mv.col) if (d->bmi.mv.as_int)
{ {
mvcount++; mvcount++;
@ -770,12 +798,6 @@ void vp8_first_pass(VP8_COMP *cpi)
*fp_motion_map_ptr = 1; *fp_motion_map_ptr = 1;
} }
} }
else
{
// Intra was best
best_ref_mv.row = 0;
best_ref_mv.col = 0;
}
} }
coded_error += this_error; coded_error += this_error;
@ -813,6 +835,7 @@ void vp8_first_pass(VP8_COMP *cpi)
fps.coded_error = coded_error >> 8; fps.coded_error = coded_error >> 8;
weight = vp8_simple_weight(cpi->Source); weight = vp8_simple_weight(cpi->Source);
if (weight < 0.1) if (weight < 0.1)
weight = 0.1; weight = 0.1;