Simplify bilateral filter search for speed
Adds an internal buffer in the encoder to store the deblocked result to help speed up the search for the best bilateral filter. Very small change in performance but a lot faster: derflr: +0.518% Change-Id: I5d37e016088e559c16317789cfb1c2f49334b2b9
This commit is contained in:
parent
294159d41e
commit
017baf9f4b
@ -246,11 +246,11 @@ int vp9_loop_bilateral_used(int level, int kf) {
|
||||
}
|
||||
|
||||
void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
|
||||
if (level != lfi->bilateral_level_set ||
|
||||
kf != lfi->bilateral_kf_set) {
|
||||
lfi->bilateral_used = vp9_loop_bilateral_used(level, kf);
|
||||
if (lfi->bilateral_used) {
|
||||
const bilateral_params_t param = vp9_bilateral_level_to_params(level, kf);
|
||||
const bilateral_params_t param = vp9_bilateral_level_to_params(level, kf);
|
||||
lfi->bilateral_used = vp9_loop_bilateral_used(level, kf);
|
||||
if (lfi->bilateral_used) {
|
||||
if (param.sigma_x != lfi->bilateral_sigma_x_set ||
|
||||
param.sigma_r != lfi->bilateral_sigma_r_set) {
|
||||
const int sigma_x = param.sigma_x;
|
||||
const int sigma_r = param.sigma_r;
|
||||
const double sigma_r_d = (double)sigma_r / BILATERAL_PRECISION;
|
||||
@ -267,9 +267,9 @@ void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
|
||||
wx_lut_[y * BILATERAL_WIN + x] =
|
||||
exp(-(x * x + y * y) / (2 * sigma_x_d * sigma_x_d));
|
||||
}
|
||||
lfi->bilateral_sigma_x_set = sigma_x;
|
||||
lfi->bilateral_sigma_r_set = sigma_r;
|
||||
}
|
||||
lfi->bilateral_level_set = level;
|
||||
lfi->bilateral_kf_set = kf;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1855,15 +1855,14 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
|
||||
}
|
||||
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
|
||||
VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int frame_filter_level,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame) {
|
||||
void vp9_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame,
|
||||
VP9_COMMON *cm,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame) {
|
||||
int start_mi_row, end_mi_row, mi_rows_to_filter;
|
||||
const int loop_bilateral_used = vp9_loop_bilateral_used(
|
||||
bilateral_level, cm->frame_type == KEY_FRAME);
|
||||
if (!frame_filter_level && !loop_bilateral_used)
|
||||
if (!loop_bilateral_used)
|
||||
return;
|
||||
start_mi_row = 0;
|
||||
mi_rows_to_filter = cm->mi_rows;
|
||||
@ -1873,12 +1872,6 @@ void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
|
||||
mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
|
||||
}
|
||||
end_mi_row = start_mi_row + mi_rows_to_filter;
|
||||
if (frame_filter_level) {
|
||||
vp9_loop_filter_frame_init(cm, frame_filter_level);
|
||||
vp9_loop_filter_rows(frame, cm, xd->plane,
|
||||
start_mi_row, end_mi_row,
|
||||
y_only);
|
||||
}
|
||||
if (loop_bilateral_used) {
|
||||
vp9_loop_bilateral_init(&cm->lf_info, bilateral_level,
|
||||
cm->frame_type == KEY_FRAME);
|
||||
|
@ -50,7 +50,8 @@ typedef struct bilateral_params {
|
||||
static bilateral_params_t
|
||||
bilateral_level_to_params_arr[BILATERAL_LEVELS + 1] = {
|
||||
// Values are rounded to 1/8 th precision
|
||||
{4, 16}, // 0 - default
|
||||
{0, 0}, // 0 - default
|
||||
{4, 16},
|
||||
{5, 16},
|
||||
{6, 16},
|
||||
{7, 16},
|
||||
@ -58,13 +59,13 @@ static bilateral_params_t
|
||||
{12, 20},
|
||||
{16, 20},
|
||||
{20, 20},
|
||||
{24, 24}
|
||||
};
|
||||
|
||||
static bilateral_params_t
|
||||
bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF + 1] = {
|
||||
// Values are rounded to 1/8 th precision
|
||||
{4, 16}, // 0 - default
|
||||
{0, 0}, // 0 - default
|
||||
{4, 16},
|
||||
{5, 16},
|
||||
{6, 16},
|
||||
{7, 16},
|
||||
@ -80,7 +81,6 @@ static bilateral_params_t
|
||||
{28, 32},
|
||||
{32, 24},
|
||||
{32, 28},
|
||||
{32, 32},
|
||||
};
|
||||
|
||||
int vp9_bilateral_level_bits(const struct VP9Common *const cm);
|
||||
@ -129,8 +129,8 @@ typedef struct {
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
double wx_lut[BILATERAL_WIN * BILATERAL_WIN];
|
||||
double wr_lut[512];
|
||||
int bilateral_level_set;
|
||||
int bilateral_kf_set;
|
||||
int bilateral_sigma_x_set;
|
||||
int bilateral_sigma_r_set;
|
||||
int bilateral_used;
|
||||
#endif
|
||||
} loop_filter_info_n;
|
||||
@ -190,18 +190,22 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
|
||||
struct macroblockd_plane planes[MAX_MB_PLANE],
|
||||
int start, int stop, int y_only);
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int frame_filter_level,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame);
|
||||
void vp9_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame);
|
||||
void vp9_loop_filter_bilateral_frame(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int frame_filter_level,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame);
|
||||
void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int T, int kf);
|
||||
void vp9_loop_bilateral_rows(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
int start_mi_row, int end_mi_row,
|
||||
int y_only);
|
||||
#endif
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
|
||||
typedef struct LoopFilterWorkerData {
|
||||
|
@ -1584,7 +1584,7 @@ static void setup_loopfilter(VP9_COMMON *cm,
|
||||
lf->bilateral_level += vp9_rb_read_literal(
|
||||
rb, vp9_bilateral_level_bits(cm));
|
||||
}
|
||||
#endif
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
}
|
||||
|
||||
static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) {
|
||||
|
@ -1531,7 +1531,7 @@ static void encode_loopfilter(VP9_COMMON *cm,
|
||||
if (lf->bilateral_level > 0)
|
||||
vp9_wb_write_literal(wb, lf->bilateral_level - 1,
|
||||
vp9_bilateral_level_bits(cm));
|
||||
#endif
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
}
|
||||
|
||||
static void write_delta_q(struct vp9_write_bit_buffer *wb, int delta_q) {
|
||||
|
@ -205,6 +205,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
|
||||
vp9_free_context_buffers(cm);
|
||||
|
||||
vp9_free_frame_buffer(&cpi->last_frame_uf);
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
vp9_free_frame_buffer(&cpi->last_frame_db);
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
vp9_free_frame_buffer(&cpi->scaled_source);
|
||||
vp9_free_frame_buffer(&cpi->scaled_last_source);
|
||||
vp9_free_frame_buffer(&cpi->alt_ref_buffer);
|
||||
@ -492,6 +495,18 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate last frame buffer");
|
||||
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
if (vp9_realloc_frame_buffer(&cpi->last_frame_db,
|
||||
cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
#endif
|
||||
VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate last frame deblocked buffer");
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
if (vp9_realloc_frame_buffer(&cpi->scaled_source,
|
||||
cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
@ -2670,9 +2685,8 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
vp9_loop_bilateral_init(&cm->lf_info, cm->lf.bilateral_level,
|
||||
cm->frame_type == KEY_FRAME);
|
||||
if (cm->lf_info.bilateral_used) {
|
||||
if (cm->lf_info.bilateral_used)
|
||||
vp9_loop_bilateral_rows(cm->frame_to_show, cm, 0, cm->mi_rows, 0);
|
||||
}
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
vp9_extend_frame_inner_borders(cm->frame_to_show);
|
||||
|
@ -263,6 +263,9 @@ typedef struct VP9_COMP {
|
||||
int ext_refresh_frame_context;
|
||||
|
||||
YV12_BUFFER_CONFIG last_frame_uf;
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
YV12_BUFFER_CONFIG last_frame_db;
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
TOKENEXTRA *tok;
|
||||
unsigned int tok_count[4][1 << 6];
|
||||
|
@ -58,16 +58,16 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
|
||||
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
#define JOINT_FILTER_BILATERAL_SEARCH
|
||||
#define USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
|
||||
static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd,
|
||||
VP9_COMP *const cpi,
|
||||
int filt_level,
|
||||
int bilateral_level,
|
||||
int partial_frame) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int filt_err;
|
||||
|
||||
vp9_loop_filter_gen_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level,
|
||||
bilateral_level, 1, partial_frame);
|
||||
vp9_loop_bilateral_frame(cm->frame_to_show, cm,
|
||||
bilateral_level, 1, partial_frame);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show, cm->bit_depth);
|
||||
@ -79,36 +79,42 @@ static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Re-instate the unfiltered frame
|
||||
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show);
|
||||
|
||||
return filt_err;
|
||||
}
|
||||
|
||||
// #define USE_RD_BILATERAL_SEARCH
|
||||
static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
VP9_COMP *cpi,
|
||||
int filter_level, int partial_frame,
|
||||
int64_t *best_cost_ret) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int i, bilateral_best, err;
|
||||
int64_t best_cost;
|
||||
int64_t cost[BILATERAL_LEVELS_KF];
|
||||
const int bilateral_level_bits = vp9_bilateral_level_bits(&cpi->common);
|
||||
const int bilateral_levels = 1 << bilateral_level_bits;
|
||||
#ifdef USE_RD_BILATERAL_SEARCH
|
||||
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
MACROBLOCK *x = &cpi->mb;
|
||||
#endif
|
||||
|
||||
// Make a copy of the unfiltered / processed recon buffer
|
||||
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filter_level,
|
||||
1, partial_frame);
|
||||
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
|
||||
|
||||
bilateral_best = 0;
|
||||
err = try_bilateral_frame(sd, cpi, filter_level, 0, partial_frame);
|
||||
#ifdef USE_RD_BILATERAL_SEARCH
|
||||
err = try_bilateral_frame(sd, cpi, 0, partial_frame);
|
||||
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
cost[0] = RDCOST(x->rdmult, x->rddiv, 0, err);
|
||||
#else
|
||||
cost[0] = err;
|
||||
#endif
|
||||
best_cost = cost[0];
|
||||
for (i = 1; i <= bilateral_levels; ++i) {
|
||||
err = try_bilateral_frame(sd, cpi, filter_level, i, partial_frame);
|
||||
#ifdef USE_RD_BILATERAL_SEARCH
|
||||
err = try_bilateral_frame(sd, cpi, i, partial_frame);
|
||||
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
// Normally the rate is rate in bits * 256 and dist is sum sq err * 64
|
||||
// when RDCOST is used. However below we just scale both in the correct
|
||||
// ratios appropriately but not exactly by these values.
|
||||
@ -123,9 +129,11 @@ static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
}
|
||||
}
|
||||
if (best_cost_ret) *best_cost_ret = best_cost;
|
||||
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
return bilateral_best;
|
||||
}
|
||||
|
||||
#ifdef JOINT_FILTER_BILATERAL_SEARCH
|
||||
static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
VP9_COMP *cpi,
|
||||
int partial_frame,
|
||||
@ -149,9 +157,6 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
// Set each entry to -1
|
||||
vpx_memset(ss_err, 0xFF, sizeof(ss_err));
|
||||
|
||||
// Make a copy of the unfiltered / processed recon buffer
|
||||
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
|
||||
bilateral = search_bilateral_level(sd, cpi, filt_mid,
|
||||
partial_frame, &best_err);
|
||||
filt_best = filt_mid;
|
||||
@ -218,6 +223,9 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
*bilateral_level = bilateral_best;
|
||||
return filt_best;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Returns used (1) or not used (0)
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
|
||||
|
@ -26,6 +26,10 @@ extern "C" {
|
||||
|
||||
#define RDCOST(RM, DM, R, D) \
|
||||
(((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
|
||||
|
||||
#define RDCOST_DBL(RM, DM, R, D) \
|
||||
(((((double)R) * (RM)) / 256.0) + ((double)D * (1 << DM)))
|
||||
|
||||
#define QIDX_SKIP_THRESH 115
|
||||
|
||||
#define MV_COST_WEIGHT 108
|
||||
|
Loading…
x
Reference in New Issue
Block a user