Decide the partitioning threshold from the variance histogram

Before encoding a frame, calculate and store each 16x16 block's
variance of source difference between last and current frame.
Find partitioning threshold T for the frame from its variance
histogram, and then use T to make partition decisions.

Comparing with fixed 16x16 partitioning, rtc set test showed an
overall psnr gain of 3.242%, and ssim gain of 3.751%. The best
psnr gain is 8.653%.

The overall encoding speed didn't change much. It got faster for
some clips(for example, 12% speedup for vidyo1), and a little
slower for others.

Also, a minor modification was made in datarate unit test.

Change-Id: Ie290743aa3814e83607b93831b667a2a49d0932c
This commit is contained in:
Yunqing Wang 2014-05-01 15:14:39 -07:00
parent a93c506034
commit 9d41313e4b
8 changed files with 145 additions and 70 deletions

View File

@ -576,7 +576,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
// Expect some frame drops in this test: for this 200 frames test,
// expect at least 10% and not more than 60% drops.
ASSERT_GE(num_drops_, 20);
ASSERT_LE(num_drops_, 120);
ASSERT_LE(num_drops_, 130);
}
}

View File

@ -20,6 +20,12 @@
extern "C" {
#endif
typedef struct {
unsigned int sse;
int sum;
unsigned int var;
} diff;
struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
int16_t *qcoeff;

View File

@ -70,12 +70,6 @@ static const uint8_t VP9_VAR_OFFS[64] = {
128, 128, 128, 128, 128, 128, 128, 128
};
typedef struct {
unsigned int sse;
int sum;
unsigned int var;
} diff;
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs) {
@ -1140,7 +1134,6 @@ static void constrain_copy_partitioning(VP9_COMP *const cpi,
}
}
const struct {
int row;
int col;
@ -1173,34 +1166,26 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
// In-image SB64
if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
(row8x8_remaining >= MI_BLOCK_SIZE)) {
const int src_stride = x->plane[0].src.stride;
const int pre_stride = cpi->Last_Source->y_stride;
const uint8_t *src = x->plane[0].src.buf;
const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
(mi_col * MI_SIZE);
const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
const unsigned int thr_32x32 = cpi->sf.source_var_thresh;
const unsigned int thr_64x64 = thr_32x32 << 1;
int i, j;
int index;
diff d32[4];
int use16x16 = 0;
const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
int is_larger_better = 0;
int use32x32 = 0;
int thr = cpi->source_var_thresh;
vpx_memset(d32, 0, 4 * sizeof(diff));
for (i = 0; i < 4; i++) {
diff d16[4];
diff *d16[4];
for (j = 0; j < 4; j++) {
int b_mi_row = coord_lookup[i * 4 + j].row;
int b_mi_col = coord_lookup[i * 4 + j].col;
int b_offset = b_mi_row * MI_SIZE * src_stride +
b_mi_col * MI_SIZE;
int boffset = b_mi_row / 2 * cm->mb_cols +
b_mi_col / 2;
vp9_get16x16var(src + b_offset, src_stride,
pre_src + b_offset, pre_stride,
&d16[j].sse, &d16[j].sum);
d16[j].var = d16[j].sse -
(((uint32_t)d16[j].sum * d16[j].sum) >> 8);
d16[j] = cpi->source_diff_var + offset + boffset;
index = b_mi_row * mis + b_mi_col;
mi_8x8[index] = mi_upper_left + index;
@ -1210,14 +1195,16 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
// size to further improve quality.
}
if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 &&
d16[2].var < thr_32x32 && d16[3].var < thr_32x32) {
d32[i].sse = d16[0].sse;
d32[i].sum = d16[0].sum;
is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
(d16[2]->var < thr) && (d16[3]->var < thr);
for (j = 1; j < 4; j++) {
d32[i].sse += d16[j].sse;
d32[i].sum += d16[j].sum;
// Use 32x32 partition
if (is_larger_better) {
use32x32 += 1;
for (j = 0; j < 4; j++) {
d32[i].sse += d16[j]->sse;
d32[i].sum += d16[j]->sum;
}
d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
@ -1225,18 +1212,16 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
if (!((cm->current_video_frame - 1) %
cpi->sf.search_type_check_frequency))
cpi->use_large_partition_rate += 1;
} else {
use16x16 = 1;
}
}
if (!use16x16) {
if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 &&
d32[2].var < thr_64x64 && d32[3].var < thr_64x64) {
if (use32x32 == 4) {
thr <<= 1;
is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
(d32[2].var < thr) && (d32[3].var < thr);
// Use 64x64 partition
if (is_larger_better) {
mi_8x8[0] = mi_upper_left;
mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
}
@ -2938,6 +2923,93 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
}
// end RTC play code
static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
SPEED_FEATURES *const sf = &cpi->sf;
VP9_COMMON *const cm = &cpi->common;
const uint8_t *src = cpi->Source->y_buffer;
const uint8_t *last_src = cpi->Last_Source->y_buffer;
const int src_stride = cpi->Source->y_stride;
const int last_stride = cpi->Last_Source->y_stride;
// Pick cutoff threshold
const int cutoff = (MIN(cm->width, cm->height) >= 720) ?
(cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) :
(cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
DECLARE_ALIGNED_ARRAY(16, int, hist, VAR_HIST_BINS);
diff *var16 = cpi->source_diff_var;
int sum = 0;
int i, j;
vpx_memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
for (i = 0; i < cm->mb_rows; i++) {
for (j = 0; j < cm->mb_cols; j++) {
vp9_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
var16->var = var16->sse -
(((uint32_t)var16->sum * var16->sum) >> 8);
if (var16->var >= VAR_HIST_MAX_BG_VAR)
hist[VAR_HIST_BINS - 1]++;
else
hist[var16->var / VAR_HIST_FACTOR]++;
src += 16;
last_src += 16;
var16++;
}
src = src - cm->mb_cols * 16 + 16 * src_stride;
last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
}
cpi->source_var_thresh = 0;
if (hist[VAR_HIST_BINS - 1] < cutoff) {
for (i = 0; i < VAR_HIST_BINS - 1; i++) {
sum += hist[i];
if (sum > cutoff) {
cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
return 0;
}
}
}
return sf->search_type_check_frequency;
}
static void source_var_based_partition_search_method(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
if (cm->frame_type == KEY_FRAME) {
// For key frame, use SEARCH_PARTITION.
sf->partition_search_type = SEARCH_PARTITION;
} else if (cm->intra_only) {
sf->partition_search_type = FIXED_PARTITION;
} else {
if (cm->last_width != cm->width || cm->last_height != cm->height) {
if (cpi->source_diff_var)
vpx_free(cpi->source_diff_var);
CHECK_MEM_ERROR(cm, cpi->source_diff_var,
vpx_calloc(cm->MBs, sizeof(diff)));
}
if (!cpi->frames_till_next_var_check)
cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
if (cpi->frames_till_next_var_check > 0) {
sf->partition_search_type = FIXED_PARTITION;
cpi->frames_till_next_var_check--;
}
}
}
static int get_skip_encode_frame(const VP9_COMMON *cm) {
unsigned int intra_count = 0, inter_count = 0;
int j;
@ -3037,28 +3109,8 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
vp9_zero(x->zcoeff_blk);
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION &&
cm->current_video_frame > 0) {
int check_freq = sf->search_type_check_frequency;
if ((cm->current_video_frame - 1) % check_freq == 0) {
cpi->use_large_partition_rate = 0;
}
if ((cm->current_video_frame - 1) % check_freq == 1) {
const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] -
b_width_log2_lookup[BLOCK_16X16]) +
(b_height_log2_lookup[BLOCK_32X32] -
b_height_log2_lookup[BLOCK_16X16]));
cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 *
mbs_in_b32x32 / cm->MBs;
}
if ((cm->current_video_frame - 1) % check_freq >= 1) {
if (cpi->use_large_partition_rate < 15)
sf->partition_search_type = FIXED_PARTITION;
}
}
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
source_var_based_partition_search_method(cpi);
}
{

View File

@ -20,6 +20,13 @@ struct macroblock;
struct yv12_buffer_config;
struct VP9_COMP;
// Constants used in SOURCE_VAR_BASED_PARTITION
#define VAR_HIST_MAX_BG_VAR 1000
#define VAR_HIST_FACTOR 10
#define VAR_HIST_BINS (VAR_HIST_MAX_BG_VAR / VAR_HIST_FACTOR + 1)
#define VAR_HIST_LARGE_CUT_OFF 75
#define VAR_HIST_SMALL_CUT_OFF 45
void vp9_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src,
int mi_row, int mi_col);

View File

@ -194,6 +194,11 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
lc->rc_twopass_stats_in.buf = NULL;
lc->rc_twopass_stats_in.sz = 0;
}
if (cpi->source_diff_var != NULL) {
vpx_free(cpi->source_diff_var);
cpi->source_diff_var = NULL;
}
}
static void save_coding_context(VP9_COMP *cpi) {
@ -907,6 +912,12 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
set_speed_features(cpi);
// Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var,
vpx_calloc(cm->MBs, sizeof(diff)));
cpi->source_var_thresh = 0;
cpi->frames_till_next_var_check = 0;
// Default rd threshold factors for mode selection
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j)

View File

@ -401,7 +401,11 @@ typedef struct VP9_COMP {
SVC svc;
int use_large_partition_rate;
// Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
diff *source_diff_var;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
unsigned int source_var_thresh;
int frames_till_next_var_check;
int frame_flags;

View File

@ -270,7 +270,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
// Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->source_var_thresh = 360;
sf->tx_size_search_method = (cm->frame_type == KEY_FRAME) ?
USE_LARGESTALL : USE_TX_8X8;
@ -350,7 +349,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
// to FIXED_PARTITION.
sf->always_this_block_size = BLOCK_16X16;
sf->search_type_check_frequency = 50;
sf->source_var_thresh = 100;
// Recode loop tolerence %.
sf->recode_tolerance = 25;

View File

@ -353,9 +353,6 @@ typedef struct SPEED_FEATURES {
// FIXED_PARTITION search type should be used.
int search_type_check_frequency;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
unsigned int source_var_thresh;
// When partition is pre-set, the inter prediction result from pick_inter_mode
// can be reused in final block encoding process. It is enabled only for real-
// time mode speed 6.