diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 37d29af17..2d417975a 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -237,6 +237,11 @@ typedef struct { union b_mode_info bmi[4]; } MODE_INFO; +enum mv_precision { + MV_PRECISION_Q3, + MV_PRECISION_Q4 +}; + #define VP9_REF_SCALE_SHIFT 14 struct scale_factors { int x_scale_fp; // horizontal fixed point scale factor @@ -249,9 +254,8 @@ struct scale_factors { int (*scale_value_x)(int val, const struct scale_factors *scale); int (*scale_value_y)(int val, const struct scale_factors *scale); void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col); - int_mv32 (*scale_mv_q3_to_q4)(const int_mv *src_mv, - const struct scale_factors *scale); - int32_t (*scale_mv_component_q4)(int mv_q4, int scale_fp, int offset_q4); + MV32 (*scale_mv_q3_to_q4)(const MV *mv, const struct scale_factors *scale); + MV32 (*scale_mv_q4)(const MV *mv, const struct scale_factors *scale); convolve_fn_t predict[2][2][2]; // horiz, vert, avg }; diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h index a1eef4649..b4dc355b2 100644 --- a/vp9/common/vp9_mv.h +++ b/vp9/common/vp9_mv.h @@ -23,14 +23,14 @@ typedef union int_mv { MV as_mv; } int_mv; /* facilitates faster equality tests and copies */ -struct mv32 { +typedef struct { int32_t row; int32_t col; -}; +} MV32; typedef union int_mv32 { - uint64_t as_int; - struct mv32 as_mv; + uint64_t as_int; + MV32 as_mv; } int_mv32; /* facilitates faster equality tests and copies */ #endif // VP9_COMMON_VP9_MV_H_ diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index b28d33319..f1d155819 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -32,45 +32,42 @@ static int unscaled_value(int val, const struct scale_factors *scale) { return val; } -static int_mv32 mv_q3_to_q4_with_scaling(const int_mv *src_mv, - const struct scale_factors *scale) { - // returns mv * scale + offset - int_mv32 result; - const int32_t mv_row_q4 = src_mv->as_mv.row << 1; - const int32_t mv_col_q4 = src_mv->as_mv.col << 1; - - result.as_mv.row = (mv_row_q4 * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT) - + scale->y_offset_q4; - result.as_mv.col = (mv_col_q4 * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT) - + scale->x_offset_q4; - return result; +static MV32 mv_q3_to_q4_with_scaling(const MV *mv, + const struct scale_factors *scale) { + const MV32 res = { + ((mv->row << 1) * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT) + + scale->y_offset_q4, + ((mv->col << 1) * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT) + + scale->x_offset_q4 + }; + return res; } -static int_mv32 mv_q3_to_q4_without_scaling(const int_mv *src_mv, - const struct scale_factors *scale) { - // returns mv * scale + offset - int_mv32 result; - - result.as_mv.row = src_mv->as_mv.row << 1; - result.as_mv.col = src_mv->as_mv.col << 1; - return result; +static MV32 mv_q3_to_q4_without_scaling(const MV *mv, + const struct scale_factors *scale) { + const MV32 res = { + mv->row << 1, + mv->col << 1 + }; + return res; } -static int32_t mv_component_q4_with_scaling(int mv_q4, int scale_fp, - int offset_q4) { - int32_t scaled_mv; - // returns the scaled and offset value of the mv component. - scaled_mv = (mv_q4 * scale_fp >> VP9_REF_SCALE_SHIFT) + offset_q4; - - return scaled_mv; +static MV32 mv_q4_with_scaling(const MV *mv, + const struct scale_factors *scale) { + const MV32 res = { + (mv->row * scale->y_scale_fp >> VP9_REF_SCALE_SHIFT) + scale->y_offset_q4, + (mv->col * scale->x_scale_fp >> VP9_REF_SCALE_SHIFT) + scale->x_offset_q4 + }; + return res; } -static int32_t mv_component_q4_without_scaling(int mv_q4, int scale_fp, - int offset_q4) { - // returns the scaled and offset value of the mv component. - (void)scale_fp; - (void)offset_q4; - return mv_q4; +static MV32 mv_q4_without_scaling(const MV *mv, + const struct scale_factors *scale) { + const MV32 res = { + mv->row, + mv->col + }; + return res; } static void set_offsets_with_scaling(struct scale_factors *scale, @@ -112,13 +109,13 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, scale->scale_value_y = unscaled_value; scale->set_scaled_offsets = set_offsets_without_scaling; scale->scale_mv_q3_to_q4 = mv_q3_to_q4_without_scaling; - scale->scale_mv_component_q4 = mv_component_q4_without_scaling; + scale->scale_mv_q4 = mv_q4_without_scaling; } else { scale->scale_value_x = scale_value_x_with_scaling; scale->scale_value_y = scale_value_y_with_scaling; scale->set_scaled_offsets = set_offsets_with_scaling; scale->scale_mv_q3_to_q4 = mv_q3_to_q4_with_scaling; - scale->scale_mv_component_q4 = mv_component_q4_with_scaling; + scale->scale_mv_q4 = mv_q4_with_scaling; } // TODO(agrange): Investigate the best choice of functions to use here @@ -288,35 +285,18 @@ void vp9_copy_mem8x4_c(const uint8_t *src, void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, - const int_mv *mv_q3, + const int_mv *src_mv, const struct scale_factors *scale, int w, int h, int weight, - const struct subpix_fn_table *subpix) { - int_mv32 mv = scale->scale_mv_q3_to_q4(mv_q3, scale); - src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); - scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight]( - src, src_stride, dst, dst_stride, - subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4, - subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4, - w, h); -} + const struct subpix_fn_table *subpix, + enum mv_precision precision) { + const MV32 mv = precision == MV_PRECISION_Q4 + ? scale->scale_mv_q4(&src_mv->as_mv, scale) + : scale->scale_mv_q3_to_q4(&src_mv->as_mv, scale); + const int subpel_x = mv.col & 15; + const int subpel_y = mv.row & 15; -void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int_mv *mv_q4, - const struct scale_factors *scale, - int w, int h, int weight, - const struct subpix_fn_table *subpix) { - const int scaled_mv_row_q4 = scale->scale_mv_component_q4(mv_q4->as_mv.row, - scale->y_scale_fp, - scale->y_offset_q4); - const int scaled_mv_col_q4 = scale->scale_mv_component_q4(mv_q4->as_mv.col, - scale->x_scale_fp, - scale->x_offset_q4); - const int subpel_x = scaled_mv_col_q4 & 15; - const int subpel_y = scaled_mv_row_q4 & 15; - - src += (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4); + src += (mv.row >> 4) * src_stride + (mv.col >> 4); scale->predict[!!subpel_x][!!subpel_y][weight]( src, src_stride, dst, dst_stride, subpix->filter_x[subpel_x], scale->x_step_q4, @@ -446,11 +426,11 @@ static void build_inter_predictors(int plane, int block, xd->mb_to_bottom_edge); scale->set_scaled_offsets(scale, arg->y + y, arg->x + x); - vp9_build_inter_predictor_q4(pre, pre_stride, - dst, arg->dst_stride[plane], - &clamped_mv, &xd->scale_factor[which_mv], - 4 << pred_w, 4 << pred_h, which_mv, - &xd->subpix); + vp9_build_inter_predictor(pre, pre_stride, + dst, arg->dst_stride[plane], + &clamped_mv, &xd->scale_factor[which_mv], + 4 << pred_w, 4 << pred_h, which_mv, + &xd->subpix, MV_PRECISION_Q4); } } void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 4e521850d..82dda599a 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -42,14 +42,8 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride, const int_mv *mv_q3, const struct scale_factors *scale, int w, int h, int do_avg, - const struct subpix_fn_table *subpix); - -void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int_mv *mv_q4, - const struct scale_factors *scale, - int w, int h, int do_avg, - const struct subpix_fn_table *subpix); + const struct subpix_fn_table *subpix, + enum mv_precision precision); static int scaled_buffer_offset(int x_offset, int y_offset, int stride, const struct scale_factors *scale) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index a48e7dbb3..8d9e41541 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1129,7 +1129,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[0], &xd->scale_factor[0], - 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix); + 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix, + MV_PRECISION_Q3); // TODO(debargha): Make this work properly with the // implicit-compoundinter-weight experiment when implicit @@ -1143,7 +1144,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, dst, xd->plane[0].dst.stride, &xd->mode_info_context->bmi[i].as_mv[1], &xd->scale_factor[1], 4 * bw, 4 * bh, 1, - &xd->subpix); + &xd->subpix, MV_PRECISION_Q3); } vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, @@ -1966,6 +1967,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, int64_t dist; cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); + model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist); rate_sum += rate; @@ -2151,7 +2153,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[refs[!id]], &xd->scale_factor[!id], pw, ph, 0, - &xd->subpix); + &xd->subpix, MV_PRECISION_Q3); // Compound motion search on first ref frame. if (id) diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 47792fcc2..872bf267a 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -51,25 +51,25 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, &xd->scale_factor[which_mv], 16, 16, which_mv, - &xd->subpix); + &xd->subpix, MV_PRECISION_Q3); stride = (stride + 1) >> 1; - vp9_build_inter_predictor_q4(u_mb_ptr, stride, - &pred[256], 8, - &mv, - &xd->scale_factor_uv[which_mv], - 8, 8, - which_mv, - &xd->subpix); + vp9_build_inter_predictor(u_mb_ptr, stride, + &pred[256], 8, + &mv, + &xd->scale_factor_uv[which_mv], + 8, 8, + which_mv, + &xd->subpix, MV_PRECISION_Q4); - vp9_build_inter_predictor_q4(v_mb_ptr, stride, - &pred[320], 8, - &mv, - &xd->scale_factor_uv[which_mv], - 8, 8, - which_mv, - &xd->subpix); + vp9_build_inter_predictor(v_mb_ptr, stride, + &pred[320], 8, + &mv, + &xd->scale_factor_uv[which_mv], + 8, 8, + which_mv, + &xd->subpix, MV_PRECISION_Q4); } void vp9_temporal_filter_apply_c(uint8_t *frame1,