Enable mode search threshold update in non-RD coding mode

Adaptively adjust the mode thresholds after each mode search round
to skip checking less likely selected modes. Local tests indicate
5% - 10% speed-up in speed -5 and -6. Average coding performance
loss is -1.055%.

speed -5
vidyo1 720p 1000 kbps
16533 b/f, 40.851 dB, 12607 ms -> 16556 b/f, 40.796 dB, 11831 ms

nik 720p 1000 kbps
33229 b/f, 39.127 dB, 11468 ms -> 33235 b/f, 39.131 dB, 10919 ms

speed -6
vidyo1 720p 1000 kbps
16549 b/f, 40.268 dB, 10138 ms -> 16538 b/f, 40.212 dB, 8456 ms

nik 720p 1000 kbps
33271 b/f, 38.433 dB,  7886 ms -> 33279 b/f, 38.416 dB, 7843 ms

Change-Id: I2c2963f1ce4ed9c1cf233b5b2c880b682e1c1e8b
This commit is contained in:
Jingning Han 2014-10-28 14:50:10 -07:00
parent 50e5c30536
commit 9349a28e80
5 changed files with 49 additions and 43 deletions

View File

@ -450,7 +450,8 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
args->dist += dist;
}
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][INTER_MODES] = {
static const THR_MODES mode_idx[MAX_REF_FRAMES][4] = {
{THR_DC, THR_H_PRED, THR_V_PRED},
{THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
{THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
{THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA},
@ -558,7 +559,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
EIGHTTAP : cm->interp_filter;
mbmi->segment_id = segment_id;
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
PREDICTION_MODE this_mode;
x->pred_mv_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
@ -610,8 +611,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
mode_rd_thresh =
rd_threshes[mode_idx[ref_frame -
LAST_FRAME][INTER_OFFSET(this_mode)]];
rd_threshes[mode_idx[ref_frame][INTER_OFFSET(this_mode)]];
if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
rd_thresh_freq_fact[this_mode]))
continue;
@ -757,10 +757,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (x->skip)
break;
}
// If the current reference frame is valid and we found a usable mode,
// we are done.
if (best_rdc.rdcost < INT64_MAX && ref_frame == GOLDEN_FRAME)
break;
// Check that a prediction mode has been selected.
assert(best_rdc.rdcost < INT64_MAX);
}
// If best prediction is not in dst buf, then copy the prediction block from
@ -836,5 +835,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
pd->dst = orig_dst;
}
if (is_inter_block(mbmi))
vp9_update_rd_thresh_fact(cpi, tile_data, bsize,
mode_idx[ref_frame][INTER_OFFSET(mbmi->mode)]);
else
vp9_update_rd_thresh_fact(cpi, tile_data, bsize,
mode_idx[ref_frame][mbmi->mode]);
*rd_cost = best_rdc;
}

View File

@ -611,6 +611,29 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
rd->thresh_mult_sub8x8[i] = INT_MAX;
}
// TODO(jingning) Refactor this function. Use targeted smaller struct as inputs.
void vp9_update_rd_thresh_fact(VP9_COMP *cpi, TileDataEnc *tile_data,
int bsize, int best_mode_index) {
if (cpi->sf.adaptive_rd_thresh > 0) {
const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
int mode;
for (mode = 0; mode < top_mode; ++mode) {
const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
BLOCK_SIZE bs;
for (bs = min_size; bs <= max_size; ++bs) {
int *const fact = &tile_data->thresh_freq_fact[bs][mode];
if (mode == best_mode_index) {
*fact -= (*fact >> 4);
} else {
*fact = MIN(*fact + RD_THRESH_INC,
cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
}
}
}
}
}
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
vpx_bit_depth_t bit_depth) {
const int q = vp9_dc_quant(qindex, qdelta, bit_depth);

View File

@ -36,6 +36,9 @@ extern "C" {
#define MAX_MODES 30
#define MAX_REFS 6
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
typedef enum {
@ -129,6 +132,7 @@ void vp9_rd_cost_reset(RD_COST *rd_cost);
void vp9_rd_cost_init(RD_COST *rd_cost);
struct TileInfo;
struct TileDataEnc;
struct VP9_COMP;
struct macroblock;
@ -158,6 +162,10 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
void vp9_update_rd_thresh_fact(struct VP9_COMP *cpi,
struct TileDataEnc *tile_data,
int bsize, int best_mode_index);
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
int thresh_fact) {
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;

View File

@ -38,9 +38,6 @@
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
(1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
@ -2765,29 +2762,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
}
// TODO(jingning) Refactor this function. Use targeted smaller struct as inputs.
static void update_rd_thresh_fact(VP9_COMP *cpi, TileDataEnc *tile_data,
int bsize, int best_mode_index) {
if (cpi->sf.adaptive_rd_thresh > 0) {
const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
int mode;
for (mode = 0; mode < top_mode; ++mode) {
const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
BLOCK_SIZE bs;
for (bs = min_size; bs <= max_size; ++bs) {
int *const fact = &tile_data->thresh_freq_fact[bs][mode];
if (mode == best_mode_index) {
*fact -= (*fact >> 4);
} else {
*fact = MIN(*fact + RD_THRESH_INC,
cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
}
}
}
}
}
void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
TileDataEnc *tile_data,
MACROBLOCK *x,
@ -3423,7 +3397,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
!is_inter_block(&best_mbmode));
if (!cpi->rc.is_src_frame_alt_ref)
update_rd_thresh_fact(cpi, tile_data, bsize, best_mode_index);
vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_mode_index);
// macroblock modes
*mbmi = best_mbmode;
@ -3578,7 +3552,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
assert((cm->interp_filter == SWITCHABLE) ||
(cm->interp_filter == mbmi->interp_filter));
update_rd_thresh_fact(cpi, tile_data, bsize, THR_ZEROMV);
vp9_update_rd_thresh_fact(cpi, tile_data, bsize, THR_ZEROMV);
vp9_zero(best_pred_diff);
vp9_zero(best_filter_diff);
@ -4153,7 +4127,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
(cm->interp_filter == best_mbmode.interp_filter) ||
!is_inter_block(&best_mbmode));
update_rd_thresh_fact(cpi, tile_data, bsize, best_ref_index);
vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_ref_index);
// macroblock modes
*mbmi = best_mbmode;

View File

@ -269,7 +269,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
sf->adaptive_rd_thresh = 2;
// This feature is only enabled when partition search is disabled.
sf->reuse_inter_pred_sby = 1;
@ -292,12 +292,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->partition_search_type = VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->mv.search_method = NSTEP;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
// Increase mode checking threshold for NEWMV.
sf->elevate_newmv_thresh = 1000;
sf->mv.reduce_first_step_size = 1;
}