Simplify bilateral filter search for speed

Adds an internal buffer in the encoder to store the deblocked
result to help speed up the search for the best bilateral filter.

Very small change in performance but a lot faster:
derflr: +0.518%

Change-Id: I5d37e016088e559c16317789cfb1c2f49334b2b9
This commit is contained in:
Debargha Mukherjee 2015-04-15 09:43:36 -07:00
parent 294159d41e
commit 017baf9f4b
8 changed files with 76 additions and 50 deletions

View File

@ -246,11 +246,11 @@ int vp9_loop_bilateral_used(int level, int kf) {
}
void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
if (level != lfi->bilateral_level_set ||
kf != lfi->bilateral_kf_set) {
lfi->bilateral_used = vp9_loop_bilateral_used(level, kf);
if (lfi->bilateral_used) {
const bilateral_params_t param = vp9_bilateral_level_to_params(level, kf);
const bilateral_params_t param = vp9_bilateral_level_to_params(level, kf);
lfi->bilateral_used = vp9_loop_bilateral_used(level, kf);
if (lfi->bilateral_used) {
if (param.sigma_x != lfi->bilateral_sigma_x_set ||
param.sigma_r != lfi->bilateral_sigma_r_set) {
const int sigma_x = param.sigma_x;
const int sigma_r = param.sigma_r;
const double sigma_r_d = (double)sigma_r / BILATERAL_PRECISION;
@ -267,9 +267,9 @@ void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
wx_lut_[y * BILATERAL_WIN + x] =
exp(-(x * x + y * y) / (2 * sigma_x_d * sigma_x_d));
}
lfi->bilateral_sigma_x_set = sigma_x;
lfi->bilateral_sigma_r_set = sigma_r;
}
lfi->bilateral_level_set = level;
lfi->bilateral_kf_set = kf;
}
}
@ -1855,15 +1855,14 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
}
#if CONFIG_LOOP_POSTFILTER
void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
VP9_COMMON *cm, MACROBLOCKD *xd,
int frame_filter_level,
int bilateral_level,
int y_only, int partial_frame) {
void vp9_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame,
VP9_COMMON *cm,
int bilateral_level,
int y_only, int partial_frame) {
int start_mi_row, end_mi_row, mi_rows_to_filter;
const int loop_bilateral_used = vp9_loop_bilateral_used(
bilateral_level, cm->frame_type == KEY_FRAME);
if (!frame_filter_level && !loop_bilateral_used)
if (!loop_bilateral_used)
return;
start_mi_row = 0;
mi_rows_to_filter = cm->mi_rows;
@ -1873,12 +1872,6 @@ void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
}
end_mi_row = start_mi_row + mi_rows_to_filter;
if (frame_filter_level) {
vp9_loop_filter_frame_init(cm, frame_filter_level);
vp9_loop_filter_rows(frame, cm, xd->plane,
start_mi_row, end_mi_row,
y_only);
}
if (loop_bilateral_used) {
vp9_loop_bilateral_init(&cm->lf_info, bilateral_level,
cm->frame_type == KEY_FRAME);

View File

@ -50,7 +50,8 @@ typedef struct bilateral_params {
static bilateral_params_t
bilateral_level_to_params_arr[BILATERAL_LEVELS + 1] = {
// Values are rounded to 1/8 th precision
{4, 16}, // 0 - default
{0, 0}, // 0 - default
{4, 16},
{5, 16},
{6, 16},
{7, 16},
@ -58,13 +59,13 @@ static bilateral_params_t
{12, 20},
{16, 20},
{20, 20},
{24, 24}
};
static bilateral_params_t
bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF + 1] = {
// Values are rounded to 1/8 th precision
{4, 16}, // 0 - default
{0, 0}, // 0 - default
{4, 16},
{5, 16},
{6, 16},
{7, 16},
@ -80,7 +81,6 @@ static bilateral_params_t
{28, 32},
{32, 24},
{32, 28},
{32, 32},
};
int vp9_bilateral_level_bits(const struct VP9Common *const cm);
@ -129,8 +129,8 @@ typedef struct {
#if CONFIG_LOOP_POSTFILTER
double wx_lut[BILATERAL_WIN * BILATERAL_WIN];
double wr_lut[512];
int bilateral_level_set;
int bilateral_kf_set;
int bilateral_sigma_x_set;
int bilateral_sigma_r_set;
int bilateral_used;
#endif
} loop_filter_info_n;
@ -190,18 +190,22 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
struct macroblockd_plane planes[MAX_MB_PLANE],
int start, int stop, int y_only);
#if CONFIG_LOOP_POSTFILTER
void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
struct VP9Common *cm,
struct macroblockd *mbd,
int frame_filter_level,
int bilateral_level,
int y_only, int partial_frame);
void vp9_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame,
struct VP9Common *cm,
int bilateral_level,
int y_only, int partial_frame);
void vp9_loop_filter_bilateral_frame(YV12_BUFFER_CONFIG *frame,
struct VP9Common *cm,
struct macroblockd *mbd,
int frame_filter_level,
int bilateral_level,
int y_only, int partial_frame);
void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int T, int kf);
void vp9_loop_bilateral_rows(YV12_BUFFER_CONFIG *frame,
struct VP9Common *cm,
int start_mi_row, int end_mi_row,
int y_only);
#endif
#endif // CONFIG_LOOP_POSTFILTER
typedef struct LoopFilterWorkerData {

View File

@ -1584,7 +1584,7 @@ static void setup_loopfilter(VP9_COMMON *cm,
lf->bilateral_level += vp9_rb_read_literal(
rb, vp9_bilateral_level_bits(cm));
}
#endif
#endif // CONFIG_LOOP_POSTFILTER
}
static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) {

View File

@ -1531,7 +1531,7 @@ static void encode_loopfilter(VP9_COMMON *cm,
if (lf->bilateral_level > 0)
vp9_wb_write_literal(wb, lf->bilateral_level - 1,
vp9_bilateral_level_bits(cm));
#endif
#endif // CONFIG_LOOP_POSTFILTER
}
static void write_delta_q(struct vp9_write_bit_buffer *wb, int delta_q) {

View File

@ -205,6 +205,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vp9_free_context_buffers(cm);
vp9_free_frame_buffer(&cpi->last_frame_uf);
#if CONFIG_LOOP_POSTFILTER
vp9_free_frame_buffer(&cpi->last_frame_db);
#endif // CONFIG_LOOP_POSTFILTER
vp9_free_frame_buffer(&cpi->scaled_source);
vp9_free_frame_buffer(&cpi->scaled_last_source);
vp9_free_frame_buffer(&cpi->alt_ref_buffer);
@ -492,6 +495,18 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate last frame buffer");
#if CONFIG_LOOP_POSTFILTER
if (vp9_realloc_frame_buffer(&cpi->last_frame_db,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate last frame deblocked buffer");
#endif // CONFIG_LOOP_POSTFILTER
if (vp9_realloc_frame_buffer(&cpi->scaled_source,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
@ -2670,9 +2685,8 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
#if CONFIG_LOOP_POSTFILTER
vp9_loop_bilateral_init(&cm->lf_info, cm->lf.bilateral_level,
cm->frame_type == KEY_FRAME);
if (cm->lf_info.bilateral_used) {
if (cm->lf_info.bilateral_used)
vp9_loop_bilateral_rows(cm->frame_to_show, cm, 0, cm->mi_rows, 0);
}
#endif // CONFIG_LOOP_POSTFILTER
vp9_extend_frame_inner_borders(cm->frame_to_show);

View File

@ -263,6 +263,9 @@ typedef struct VP9_COMP {
int ext_refresh_frame_context;
YV12_BUFFER_CONFIG last_frame_uf;
#if CONFIG_LOOP_POSTFILTER
YV12_BUFFER_CONFIG last_frame_db;
#endif // CONFIG_LOOP_POSTFILTER
TOKENEXTRA *tok;
unsigned int tok_count[4][1 << 6];

View File

@ -58,16 +58,16 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
#if CONFIG_LOOP_POSTFILTER
#define JOINT_FILTER_BILATERAL_SEARCH
#define USE_RD_LOOP_POSTFILTER_SEARCH
static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd,
VP9_COMP *const cpi,
int filt_level,
int bilateral_level,
int partial_frame) {
VP9_COMMON *const cm = &cpi->common;
int filt_err;
vp9_loop_filter_gen_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level,
bilateral_level, 1, partial_frame);
vp9_loop_bilateral_frame(cm->frame_to_show, cm,
bilateral_level, 1, partial_frame);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show, cm->bit_depth);
@ -79,36 +79,42 @@ static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd,
#endif // CONFIG_VP9_HIGHBITDEPTH
// Re-instate the unfiltered frame
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show);
return filt_err;
}
// #define USE_RD_BILATERAL_SEARCH
static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
VP9_COMP *cpi,
int filter_level, int partial_frame,
int64_t *best_cost_ret) {
VP9_COMMON *const cm = &cpi->common;
int i, bilateral_best, err;
int64_t best_cost;
int64_t cost[BILATERAL_LEVELS_KF];
const int bilateral_level_bits = vp9_bilateral_level_bits(&cpi->common);
const int bilateral_levels = 1 << bilateral_level_bits;
#ifdef USE_RD_BILATERAL_SEARCH
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
MACROBLOCK *x = &cpi->mb;
#endif
// Make a copy of the unfiltered / processed recon buffer
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filter_level,
1, partial_frame);
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
bilateral_best = 0;
err = try_bilateral_frame(sd, cpi, filter_level, 0, partial_frame);
#ifdef USE_RD_BILATERAL_SEARCH
err = try_bilateral_frame(sd, cpi, 0, partial_frame);
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
cost[0] = RDCOST(x->rdmult, x->rddiv, 0, err);
#else
cost[0] = err;
#endif
best_cost = cost[0];
for (i = 1; i <= bilateral_levels; ++i) {
err = try_bilateral_frame(sd, cpi, filter_level, i, partial_frame);
#ifdef USE_RD_BILATERAL_SEARCH
err = try_bilateral_frame(sd, cpi, i, partial_frame);
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
// Normally the rate is rate in bits * 256 and dist is sum sq err * 64
// when RDCOST is used. However below we just scale both in the correct
// ratios appropriately but not exactly by these values.
@ -123,9 +129,11 @@ static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
}
}
if (best_cost_ret) *best_cost_ret = best_cost;
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
return bilateral_best;
}
#ifdef JOINT_FILTER_BILATERAL_SEARCH
static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
VP9_COMP *cpi,
int partial_frame,
@ -149,9 +157,6 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
// Set each entry to -1
vpx_memset(ss_err, 0xFF, sizeof(ss_err));
// Make a copy of the unfiltered / processed recon buffer
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
bilateral = search_bilateral_level(sd, cpi, filt_mid,
partial_frame, &best_err);
filt_best = filt_mid;
@ -218,6 +223,9 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
*bilateral_level = bilateral_best;
return filt_best;
}
#endif
// Returns used (1) or not used (0)
#endif // CONFIG_LOOP_POSTFILTER
static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,

View File

@ -26,6 +26,10 @@ extern "C" {
#define RDCOST(RM, DM, R, D) \
(((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
#define RDCOST_DBL(RM, DM, R, D) \
(((((double)R) * (RM)) / 256.0) + ((double)D * (1 << DM)))
#define QIDX_SKIP_THRESH 115
#define MV_COST_WEIGHT 108