Merge "Simplify bilateral filter search for speed" into nextgen
This commit is contained in:
commit
fb001c2e2f
@ -246,11 +246,11 @@ int vp9_loop_bilateral_used(int level, int kf) {
|
||||
}
|
||||
|
||||
void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
|
||||
if (level != lfi->bilateral_level_set ||
|
||||
kf != lfi->bilateral_kf_set) {
|
||||
lfi->bilateral_used = vp9_loop_bilateral_used(level, kf);
|
||||
if (lfi->bilateral_used) {
|
||||
const bilateral_params_t param = vp9_bilateral_level_to_params(level, kf);
|
||||
const bilateral_params_t param = vp9_bilateral_level_to_params(level, kf);
|
||||
lfi->bilateral_used = vp9_loop_bilateral_used(level, kf);
|
||||
if (lfi->bilateral_used) {
|
||||
if (param.sigma_x != lfi->bilateral_sigma_x_set ||
|
||||
param.sigma_r != lfi->bilateral_sigma_r_set) {
|
||||
const int sigma_x = param.sigma_x;
|
||||
const int sigma_r = param.sigma_r;
|
||||
const double sigma_r_d = (double)sigma_r / BILATERAL_PRECISION;
|
||||
@ -267,9 +267,9 @@ void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
|
||||
wx_lut_[y * BILATERAL_WIN + x] =
|
||||
exp(-(x * x + y * y) / (2 * sigma_x_d * sigma_x_d));
|
||||
}
|
||||
lfi->bilateral_sigma_x_set = sigma_x;
|
||||
lfi->bilateral_sigma_r_set = sigma_r;
|
||||
}
|
||||
lfi->bilateral_level_set = level;
|
||||
lfi->bilateral_kf_set = kf;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1855,15 +1855,14 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
|
||||
}
|
||||
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
|
||||
VP9_COMMON *cm, MACROBLOCKD *xd,
|
||||
int frame_filter_level,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame) {
|
||||
void vp9_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame,
|
||||
VP9_COMMON *cm,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame) {
|
||||
int start_mi_row, end_mi_row, mi_rows_to_filter;
|
||||
const int loop_bilateral_used = vp9_loop_bilateral_used(
|
||||
bilateral_level, cm->frame_type == KEY_FRAME);
|
||||
if (!frame_filter_level && !loop_bilateral_used)
|
||||
if (!loop_bilateral_used)
|
||||
return;
|
||||
start_mi_row = 0;
|
||||
mi_rows_to_filter = cm->mi_rows;
|
||||
@ -1873,12 +1872,6 @@ void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
|
||||
mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
|
||||
}
|
||||
end_mi_row = start_mi_row + mi_rows_to_filter;
|
||||
if (frame_filter_level) {
|
||||
vp9_loop_filter_frame_init(cm, frame_filter_level);
|
||||
vp9_loop_filter_rows(frame, cm, xd->plane,
|
||||
start_mi_row, end_mi_row,
|
||||
y_only);
|
||||
}
|
||||
if (loop_bilateral_used) {
|
||||
vp9_loop_bilateral_init(&cm->lf_info, bilateral_level,
|
||||
cm->frame_type == KEY_FRAME);
|
||||
|
@ -50,7 +50,8 @@ typedef struct bilateral_params {
|
||||
static bilateral_params_t
|
||||
bilateral_level_to_params_arr[BILATERAL_LEVELS + 1] = {
|
||||
// Values are rounded to 1/8 th precision
|
||||
{4, 16}, // 0 - default
|
||||
{0, 0}, // 0 - default
|
||||
{4, 16},
|
||||
{5, 16},
|
||||
{6, 16},
|
||||
{7, 16},
|
||||
@ -58,13 +59,13 @@ static bilateral_params_t
|
||||
{12, 20},
|
||||
{16, 20},
|
||||
{20, 20},
|
||||
{24, 24}
|
||||
};
|
||||
|
||||
static bilateral_params_t
|
||||
bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF + 1] = {
|
||||
// Values are rounded to 1/8 th precision
|
||||
{4, 16}, // 0 - default
|
||||
{0, 0}, // 0 - default
|
||||
{4, 16},
|
||||
{5, 16},
|
||||
{6, 16},
|
||||
{7, 16},
|
||||
@ -80,7 +81,6 @@ static bilateral_params_t
|
||||
{28, 32},
|
||||
{32, 24},
|
||||
{32, 28},
|
||||
{32, 32},
|
||||
};
|
||||
|
||||
int vp9_bilateral_level_bits(const struct VP9Common *const cm);
|
||||
@ -129,8 +129,8 @@ typedef struct {
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
double wx_lut[BILATERAL_WIN * BILATERAL_WIN];
|
||||
double wr_lut[512];
|
||||
int bilateral_level_set;
|
||||
int bilateral_kf_set;
|
||||
int bilateral_sigma_x_set;
|
||||
int bilateral_sigma_r_set;
|
||||
int bilateral_used;
|
||||
#endif
|
||||
} loop_filter_info_n;
|
||||
@ -190,18 +190,22 @@ void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
|
||||
struct macroblockd_plane planes[MAX_MB_PLANE],
|
||||
int start, int stop, int y_only);
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
void vp9_loop_filter_gen_frame(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int frame_filter_level,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame);
|
||||
void vp9_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame);
|
||||
void vp9_loop_filter_bilateral_frame(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int frame_filter_level,
|
||||
int bilateral_level,
|
||||
int y_only, int partial_frame);
|
||||
void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int T, int kf);
|
||||
void vp9_loop_bilateral_rows(YV12_BUFFER_CONFIG *frame,
|
||||
struct VP9Common *cm,
|
||||
int start_mi_row, int end_mi_row,
|
||||
int y_only);
|
||||
#endif
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
|
||||
typedef struct LoopFilterWorkerData {
|
||||
|
@ -1584,7 +1584,7 @@ static void setup_loopfilter(VP9_COMMON *cm,
|
||||
lf->bilateral_level += vp9_rb_read_literal(
|
||||
rb, vp9_bilateral_level_bits(cm));
|
||||
}
|
||||
#endif
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
}
|
||||
|
||||
static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) {
|
||||
|
@ -1531,7 +1531,7 @@ static void encode_loopfilter(VP9_COMMON *cm,
|
||||
if (lf->bilateral_level > 0)
|
||||
vp9_wb_write_literal(wb, lf->bilateral_level - 1,
|
||||
vp9_bilateral_level_bits(cm));
|
||||
#endif
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
}
|
||||
|
||||
static void write_delta_q(struct vp9_write_bit_buffer *wb, int delta_q) {
|
||||
|
@ -205,6 +205,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
|
||||
vp9_free_context_buffers(cm);
|
||||
|
||||
vp9_free_frame_buffer(&cpi->last_frame_uf);
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
vp9_free_frame_buffer(&cpi->last_frame_db);
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
vp9_free_frame_buffer(&cpi->scaled_source);
|
||||
vp9_free_frame_buffer(&cpi->scaled_last_source);
|
||||
vp9_free_frame_buffer(&cpi->alt_ref_buffer);
|
||||
@ -492,6 +495,18 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate last frame buffer");
|
||||
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
if (vp9_realloc_frame_buffer(&cpi->last_frame_db,
|
||||
cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
#endif
|
||||
VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate last frame deblocked buffer");
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
if (vp9_realloc_frame_buffer(&cpi->scaled_source,
|
||||
cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
@ -2670,9 +2685,8 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
vp9_loop_bilateral_init(&cm->lf_info, cm->lf.bilateral_level,
|
||||
cm->frame_type == KEY_FRAME);
|
||||
if (cm->lf_info.bilateral_used) {
|
||||
if (cm->lf_info.bilateral_used)
|
||||
vp9_loop_bilateral_rows(cm->frame_to_show, cm, 0, cm->mi_rows, 0);
|
||||
}
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
vp9_extend_frame_inner_borders(cm->frame_to_show);
|
||||
|
@ -263,6 +263,9 @@ typedef struct VP9_COMP {
|
||||
int ext_refresh_frame_context;
|
||||
|
||||
YV12_BUFFER_CONFIG last_frame_uf;
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
YV12_BUFFER_CONFIG last_frame_db;
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
TOKENEXTRA *tok;
|
||||
unsigned int tok_count[4][1 << 6];
|
||||
|
@ -58,16 +58,16 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
|
||||
|
||||
#if CONFIG_LOOP_POSTFILTER
|
||||
#define JOINT_FILTER_BILATERAL_SEARCH
|
||||
#define USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
|
||||
static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd,
|
||||
VP9_COMP *const cpi,
|
||||
int filt_level,
|
||||
int bilateral_level,
|
||||
int partial_frame) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int filt_err;
|
||||
|
||||
vp9_loop_filter_gen_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level,
|
||||
bilateral_level, 1, partial_frame);
|
||||
vp9_loop_bilateral_frame(cm->frame_to_show, cm,
|
||||
bilateral_level, 1, partial_frame);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show, cm->bit_depth);
|
||||
@ -79,36 +79,42 @@ static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd,
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
// Re-instate the unfiltered frame
|
||||
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show);
|
||||
|
||||
return filt_err;
|
||||
}
|
||||
|
||||
// #define USE_RD_BILATERAL_SEARCH
|
||||
static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
VP9_COMP *cpi,
|
||||
int filter_level, int partial_frame,
|
||||
int64_t *best_cost_ret) {
|
||||
VP9_COMMON *const cm = &cpi->common;
|
||||
int i, bilateral_best, err;
|
||||
int64_t best_cost;
|
||||
int64_t cost[BILATERAL_LEVELS_KF];
|
||||
const int bilateral_level_bits = vp9_bilateral_level_bits(&cpi->common);
|
||||
const int bilateral_levels = 1 << bilateral_level_bits;
|
||||
#ifdef USE_RD_BILATERAL_SEARCH
|
||||
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
MACROBLOCK *x = &cpi->mb;
|
||||
#endif
|
||||
|
||||
// Make a copy of the unfiltered / processed recon buffer
|
||||
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filter_level,
|
||||
1, partial_frame);
|
||||
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
|
||||
|
||||
bilateral_best = 0;
|
||||
err = try_bilateral_frame(sd, cpi, filter_level, 0, partial_frame);
|
||||
#ifdef USE_RD_BILATERAL_SEARCH
|
||||
err = try_bilateral_frame(sd, cpi, 0, partial_frame);
|
||||
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
cost[0] = RDCOST(x->rdmult, x->rddiv, 0, err);
|
||||
#else
|
||||
cost[0] = err;
|
||||
#endif
|
||||
best_cost = cost[0];
|
||||
for (i = 1; i <= bilateral_levels; ++i) {
|
||||
err = try_bilateral_frame(sd, cpi, filter_level, i, partial_frame);
|
||||
#ifdef USE_RD_BILATERAL_SEARCH
|
||||
err = try_bilateral_frame(sd, cpi, i, partial_frame);
|
||||
#ifdef USE_RD_LOOP_POSTFILTER_SEARCH
|
||||
// Normally the rate is rate in bits * 256 and dist is sum sq err * 64
|
||||
// when RDCOST is used. However below we just scale both in the correct
|
||||
// ratios appropriately but not exactly by these values.
|
||||
@ -123,9 +129,11 @@ static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
}
|
||||
}
|
||||
if (best_cost_ret) *best_cost_ret = best_cost;
|
||||
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
return bilateral_best;
|
||||
}
|
||||
|
||||
#ifdef JOINT_FILTER_BILATERAL_SEARCH
|
||||
static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
VP9_COMP *cpi,
|
||||
int partial_frame,
|
||||
@ -149,9 +157,6 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
// Set each entry to -1
|
||||
vpx_memset(ss_err, 0xFF, sizeof(ss_err));
|
||||
|
||||
// Make a copy of the unfiltered / processed recon buffer
|
||||
vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
|
||||
bilateral = search_bilateral_level(sd, cpi, filt_mid,
|
||||
partial_frame, &best_err);
|
||||
filt_best = filt_mid;
|
||||
@ -218,6 +223,9 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
|
||||
*bilateral_level = bilateral_best;
|
||||
return filt_best;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Returns used (1) or not used (0)
|
||||
#endif // CONFIG_LOOP_POSTFILTER
|
||||
|
||||
static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
|
||||
|
@ -26,6 +26,10 @@ extern "C" {
|
||||
|
||||
#define RDCOST(RM, DM, R, D) \
|
||||
(((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
|
||||
|
||||
#define RDCOST_DBL(RM, DM, R, D) \
|
||||
(((((double)R) * (RM)) / 256.0) + ((double)D * (1 << DM)))
|
||||
|
||||
#define QIDX_SKIP_THRESH 115
|
||||
|
||||
#define MV_COST_WEIGHT 108
|
||||
|
Loading…
x
Reference in New Issue
Block a user