vpx/vp9/common/vp9_findnearmv.h

187 lines
5.7 KiB
C
Raw Normal View History

2010-05-18 11:58:33 -04:00
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
2010-05-18 11:58:33 -04:00
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
2010-05-18 11:58:33 -04:00
*/
#ifndef VP9_COMMON_VP9_FINDNEARMV_H_
#define VP9_COMMON_VP9_FINDNEARMV_H_
2010-05-18 11:58:33 -04:00
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_treecoder.h"
#include "vp9/common/vp9_onyxc_int.h"
2010-05-18 11:58:33 -04:00
#define LEFT_TOP_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
#define RIGHT_BOTTOM_MARGIN ((VP9BORDERINPIXELS - VP9_INTERP_EXTEND) << 3)
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
// score to use as ref motion vector
void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
int_mv *mvlist,
int_mv *nearest,
int_mv *near);
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
int_mv *mvp, const int *ref_frame_sign_bias) {
MV xmv = mvp->as_mv;
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) {
xmv.row *= -1;
xmv.col *= -1;
}
mvp->as_mv = xmv;
}
// TODO(jingning): this mv clamping function should be block size dependent.
static void clamp_mv(int_mv *mv,
int mb_to_left_edge,
int mb_to_right_edge,
int mb_to_top_edge,
int mb_to_bottom_edge) {
mv->as_mv.col = clamp(mv->as_mv.col, mb_to_left_edge, mb_to_right_edge);
mv->as_mv.row = clamp(mv->as_mv.row, mb_to_top_edge, mb_to_bottom_edge);
}
static int clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) {
int_mv tmp_mv;
tmp_mv.as_int = mv->as_int;
clamp_mv(mv,
xd->mb_to_left_edge - LEFT_TOP_MARGIN,
xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
xd->mb_to_top_edge - LEFT_TOP_MARGIN,
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
return tmp_mv.as_int != mv->as_int;
}
static int check_mv_bounds(int_mv *mv,
int mb_to_left_edge, int mb_to_right_edge,
int mb_to_top_edge, int mb_to_bottom_edge) {
return mv->as_mv.col < mb_to_left_edge ||
mv->as_mv.col > mb_to_right_edge ||
mv->as_mv.row < mb_to_top_edge ||
mv->as_mv.row > mb_to_bottom_edge;
}
vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
vp9_prob p[VP9_MVREFS - 1],
const int context);
2010-05-18 11:58:33 -04:00
extern const uint8_t vp9_mbsplit_offset[4][16];
2010-05-18 11:58:33 -04:00
[WIP] Add column-based tiling. This patch adds column-based tiling. The idea is to make each tile independently decodable (after reading the common frame header) and also independendly encodable (minus within-frame cost adjustments in the RD loop) to speed-up hardware & software en/decoders if they used multi-threading. Column-based tiling has the added advantage (over other tiling methods) that it minimizes realtime use-case latency, since all threads can start encoding data as soon as the first SB-row worth of data is available to the encoder. There is some test code that does random tile ordering in the decoder, to confirm that each tile is indeed independently decodable from other tiles in the same frame. At tile edges, all contexts assume default values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode), and motion vector search and ordering do not cross tiles in the same frame. t log Tile independence is not maintained between frames ATM, i.e. tile 0 of frame 1 is free to use motion vectors that point into any tile of frame 0. We support 1 (i.e. no tiling), 2 or 4 column-tiles. The loopfilter crosses tile boundaries. I discussed this briefly with Aki and he says that's OK. An in-loop loopfilter would need to do some sync between tile threads, but that shouldn't be a big issue. Resuls: with tiling disabled, we go up slightly because of improved edge use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf, ~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5% on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is concentrated in the low-bitrate end of clips, and most of it is because of the loss of edges at tile boundaries and the resulting loss of intra predictors. TODO: - more tiles (perhaps allow row-based tiling also, and max. 8 tiles)? - maybe optionally (for EC purposes), motion vectors themselves should not cross tile edges, or we should emulate such borders as if they were off-frame, to limit error propagation to within one tile only. This doesn't have to be the default behaviour but could be an optional bitstream flag. Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 09:35:28 -08:00
static int left_block_mv(const MACROBLOCKD *xd,
const MODE_INFO *cur_mb, int b) {
if (!(b & 3)) {
[WIP] Add column-based tiling. This patch adds column-based tiling. The idea is to make each tile independently decodable (after reading the common frame header) and also independendly encodable (minus within-frame cost adjustments in the RD loop) to speed-up hardware & software en/decoders if they used multi-threading. Column-based tiling has the added advantage (over other tiling methods) that it minimizes realtime use-case latency, since all threads can start encoding data as soon as the first SB-row worth of data is available to the encoder. There is some test code that does random tile ordering in the decoder, to confirm that each tile is indeed independently decodable from other tiles in the same frame. At tile edges, all contexts assume default values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode), and motion vector search and ordering do not cross tiles in the same frame. t log Tile independence is not maintained between frames ATM, i.e. tile 0 of frame 1 is free to use motion vectors that point into any tile of frame 0. We support 1 (i.e. no tiling), 2 or 4 column-tiles. The loopfilter crosses tile boundaries. I discussed this briefly with Aki and he says that's OK. An in-loop loopfilter would need to do some sync between tile threads, but that shouldn't be a big issue. Resuls: with tiling disabled, we go up slightly because of improved edge use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf, ~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5% on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is concentrated in the low-bitrate end of clips, and most of it is because of the loss of edges at tile boundaries and the resulting loss of intra predictors. TODO: - more tiles (perhaps allow row-based tiling also, and max. 8 tiles)? - maybe optionally (for EC purposes), motion vectors themselves should not cross tile edges, or we should emulate such borders as if they were off-frame, to limit error propagation to within one tile only. This doesn't have to be the default behaviour but could be an optional bitstream flag. Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 09:35:28 -08:00
if (!xd->left_available)
return 0;
// On L edge, get from MB to left of us
--cur_mb;
if (cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv[0].as_int;
b += 4;
}
return (cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
[WIP] Add column-based tiling. This patch adds column-based tiling. The idea is to make each tile independently decodable (after reading the common frame header) and also independendly encodable (minus within-frame cost adjustments in the RD loop) to speed-up hardware & software en/decoders if they used multi-threading. Column-based tiling has the added advantage (over other tiling methods) that it minimizes realtime use-case latency, since all threads can start encoding data as soon as the first SB-row worth of data is available to the encoder. There is some test code that does random tile ordering in the decoder, to confirm that each tile is indeed independently decodable from other tiles in the same frame. At tile edges, all contexts assume default values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode), and motion vector search and ordering do not cross tiles in the same frame. t log Tile independence is not maintained between frames ATM, i.e. tile 0 of frame 1 is free to use motion vectors that point into any tile of frame 0. We support 1 (i.e. no tiling), 2 or 4 column-tiles. The loopfilter crosses tile boundaries. I discussed this briefly with Aki and he says that's OK. An in-loop loopfilter would need to do some sync between tile threads, but that shouldn't be a big issue. Resuls: with tiling disabled, we go up slightly because of improved edge use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf, ~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5% on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is concentrated in the low-bitrate end of clips, and most of it is because of the loss of edges at tile boundaries and the resulting loss of intra predictors. TODO: - more tiles (perhaps allow row-based tiling also, and max. 8 tiles)? - maybe optionally (for EC purposes), motion vectors themselves should not cross tile edges, or we should emulate such borders as if they were off-frame, to limit error propagation to within one tile only. This doesn't have to be the default behaviour but could be an optional bitstream flag. Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 09:35:28 -08:00
static int left_block_second_mv(const MACROBLOCKD *xd,
const MODE_INFO *cur_mb, int b) {
if (!(b & 3)) {
[WIP] Add column-based tiling. This patch adds column-based tiling. The idea is to make each tile independently decodable (after reading the common frame header) and also independendly encodable (minus within-frame cost adjustments in the RD loop) to speed-up hardware & software en/decoders if they used multi-threading. Column-based tiling has the added advantage (over other tiling methods) that it minimizes realtime use-case latency, since all threads can start encoding data as soon as the first SB-row worth of data is available to the encoder. There is some test code that does random tile ordering in the decoder, to confirm that each tile is indeed independently decodable from other tiles in the same frame. At tile edges, all contexts assume default values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode), and motion vector search and ordering do not cross tiles in the same frame. t log Tile independence is not maintained between frames ATM, i.e. tile 0 of frame 1 is free to use motion vectors that point into any tile of frame 0. We support 1 (i.e. no tiling), 2 or 4 column-tiles. The loopfilter crosses tile boundaries. I discussed this briefly with Aki and he says that's OK. An in-loop loopfilter would need to do some sync between tile threads, but that shouldn't be a big issue. Resuls: with tiling disabled, we go up slightly because of improved edge use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf, ~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5% on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is concentrated in the low-bitrate end of clips, and most of it is because of the loss of edges at tile boundaries and the resulting loss of intra predictors. TODO: - more tiles (perhaps allow row-based tiling also, and max. 8 tiles)? - maybe optionally (for EC purposes), motion vectors themselves should not cross tile edges, or we should emulate such borders as if they were off-frame, to limit error propagation to within one tile only. This doesn't have to be the default behaviour but could be an optional bitstream flag. Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 09:35:28 -08:00
if (!xd->left_available)
return 0;
/* On L edge, get from MB to left of us */
--cur_mb;
if (cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.second_ref_frame > 0 ?
cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int;
b += 4;
}
return cur_mb->mbmi.second_ref_frame > 0 ?
(cur_mb->bmi + b - 1)->as_mv[1].as_int :
(cur_mb->bmi + b - 1)->as_mv[0].as_int;
}
static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
if (!(b >> 2)) {
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
if (cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv[0].as_int;
b += 16;
}
return (cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static int above_block_second_mv(const MODE_INFO *cur_mb, int b, int mi_stride) {
if (!(b >> 2)) {
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
if (cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.second_ref_frame > 0 ?
cur_mb->mbmi.mv[1].as_int : cur_mb->mbmi.mv[0].as_int;
b += 16;
}
return cur_mb->mbmi.second_ref_frame > 0 ?
(cur_mb->bmi + b - 4)->as_mv[1].as_int :
(cur_mb->bmi + b - 4)->as_mv[0].as_int;
}
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) {
if (!(b & 3)) {
/* On L edge, get from MB to left of us */
--cur_mb;
if (cur_mb->mbmi.mode < I8X8_PRED) {
return pred_mode_conv(cur_mb->mbmi.mode);
} else if (cur_mb->mbmi.mode == I8X8_PRED) {
return pred_mode_conv(
(MB_PREDICTION_MODE)(cur_mb->bmi + 3 + b)->as_mode.first);
} else if (cur_mb->mbmi.mode == I4X4_PRED) {
return ((cur_mb->bmi + 3 + b)->as_mode.first);
} else {
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 1)->as_mode.first;
}
static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb,
int b, int mi_stride) {
if (!(b >> 2)) {
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
if (cur_mb->mbmi.mode < I8X8_PRED) {
return pred_mode_conv(cur_mb->mbmi.mode);
} else if (cur_mb->mbmi.mode == I8X8_PRED) {
return pred_mode_conv(
(MB_PREDICTION_MODE)(cur_mb->bmi + 12 + b)->as_mode.first);
} else if (cur_mb->mbmi.mode == I4X4_PRED) {
return ((cur_mb->bmi + 12 + b)->as_mode.first);
} else {
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 4)->as_mode.first;
}
#endif // VP9_COMMON_VP9_FINDNEARMV_H_