2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
2012-11-30 01:36:10 +01:00
|
|
|
#ifndef VP9_COMMON_VP9_BLOCKD_H_
|
|
|
|
#define VP9_COMMON_VP9_BLOCKD_H_
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-12-23 16:20:10 +01:00
|
|
|
#include "./vpx_config.h"
|
2013-07-10 21:29:43 +02:00
|
|
|
|
|
|
|
#include "vpx_ports/mem.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
#include "vpx_scale/yv12config.h"
|
2013-07-10 21:29:43 +02:00
|
|
|
|
|
|
|
#include "vp9/common/vp9_common.h"
|
|
|
|
#include "vp9/common/vp9_common_data.h"
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
#include "vp9/common/vp9_convolve.h"
|
2013-07-10 21:29:43 +02:00
|
|
|
#include "vp9/common/vp9_enums.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/common/vp9_mv.h"
|
2013-07-10 21:29:43 +02:00
|
|
|
#include "vp9/common/vp9_seg_common.h"
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/common/vp9_treecoder.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-06-03 19:39:40 +02:00
|
|
|
#define BLOCK_SIZE_GROUPS 4
|
2012-03-19 19:02:04 +01:00
|
|
|
#define MBSKIP_CONTEXTS 3
|
|
|
|
|
2010-10-28 01:04:02 +02:00
|
|
|
/* Segment Feature Masks */
|
2013-04-19 16:40:36 +02:00
|
|
|
#define MAX_MV_REF_CANDIDATES 2
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-06-06 22:44:34 +02:00
|
|
|
#define INTRA_INTER_CONTEXTS 4
|
|
|
|
#define COMP_INTER_CONTEXTS 5
|
|
|
|
#define REF_CONTEXTS 5
|
|
|
|
|
2013-02-20 20:36:31 +01:00
|
|
|
typedef enum {
|
2012-10-15 00:29:56 +02:00
|
|
|
PLANE_TYPE_Y_WITH_DC,
|
2013-02-15 21:09:05 +01:00
|
|
|
PLANE_TYPE_UV,
|
2012-10-15 00:29:56 +02:00
|
|
|
} PLANE_TYPE;
|
2011-02-23 22:37:08 +01:00
|
|
|
|
2010-08-31 16:49:57 +02:00
|
|
|
typedef char ENTROPY_CONTEXT;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-04-23 19:12:18 +02:00
|
|
|
typedef char PARTITION_CONTEXT;
|
|
|
|
|
2013-04-17 00:30:28 +02:00
|
|
|
static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
|
|
|
|
ENTROPY_CONTEXT b) {
|
|
|
|
return (a != 0) + (b != 0);
|
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
typedef enum {
|
|
|
|
KEY_FRAME = 0,
|
2013-06-03 19:39:40 +02:00
|
|
|
INTER_FRAME = 1,
|
|
|
|
NUM_FRAME_TYPES,
|
2010-05-18 17:58:33 +02:00
|
|
|
} FRAME_TYPE;
|
|
|
|
|
2013-02-27 20:17:38 +01:00
|
|
|
typedef enum {
|
2013-01-08 23:14:01 +01:00
|
|
|
EIGHTTAP_SMOOTH,
|
|
|
|
EIGHTTAP,
|
|
|
|
EIGHTTAP_SHARP,
|
|
|
|
BILINEAR,
|
2012-07-18 22:43:01 +02:00
|
|
|
SWITCHABLE /* should be the last one */
|
|
|
|
} INTERPOLATIONFILTERTYPE;
|
|
|
|
|
2013-02-27 20:17:38 +01:00
|
|
|
typedef enum {
|
2013-05-09 20:37:51 +02:00
|
|
|
DC_PRED, // Average of above and left pixels
|
|
|
|
V_PRED, // Vertical
|
|
|
|
H_PRED, // Horizontal
|
|
|
|
D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi)
|
|
|
|
D135_PRED, // Directional 135 deg = 180 - 45
|
|
|
|
D117_PRED, // Directional 117 deg = 180 - 63
|
|
|
|
D153_PRED, // Directional 153 deg = 180 - 27
|
|
|
|
D27_PRED, // Directional 27 deg = round(arctan(1/2) * 180/pi)
|
|
|
|
D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
|
|
|
|
TM_PRED, // True-motion
|
2012-07-14 00:21:29 +02:00
|
|
|
NEARESTMV,
|
|
|
|
NEARMV,
|
|
|
|
ZEROMV,
|
|
|
|
NEWMV,
|
|
|
|
MB_MODE_COUNT
|
2010-05-18 17:58:33 +02:00
|
|
|
} MB_PREDICTION_MODE;
|
|
|
|
|
2013-07-15 21:26:58 +02:00
|
|
|
static INLINE int is_intra_mode(MB_PREDICTION_MODE mode) {
|
|
|
|
return mode <= TM_PRED;
|
|
|
|
}
|
|
|
|
|
2013-04-24 21:14:58 +02:00
|
|
|
static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
|
2013-05-30 21:49:38 +02:00
|
|
|
return mode >= NEARESTMV && mode <= NEWMV;
|
2013-04-24 21:14:58 +02:00
|
|
|
}
|
|
|
|
|
2013-05-31 01:21:48 +02:00
|
|
|
#define VP9_INTRA_MODES (TM_PRED + 1)
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-06-05 20:21:44 +02:00
|
|
|
#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-07-18 02:07:32 +02:00
|
|
|
static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) {
|
|
|
|
return (mode - NEARESTMV);
|
|
|
|
}
|
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
/* For keyframes, intra block modes are predicted by the (already decoded)
|
|
|
|
modes for the Y blocks to the left and above us; for interframes, there
|
|
|
|
is a single probability table. */
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
union b_mode_info {
|
2013-07-03 01:51:57 +02:00
|
|
|
MB_PREDICTION_MODE as_mode;
|
2013-02-09 04:46:36 +01:00
|
|
|
int_mv as_mv[2]; // first, second inter predictor motion vectors
|
2011-05-26 21:13:00 +02:00
|
|
|
};
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
typedef enum {
|
2012-11-07 15:50:25 +01:00
|
|
|
NONE = -1,
|
2012-07-14 00:21:29 +02:00
|
|
|
INTRA_FRAME = 0,
|
|
|
|
LAST_FRAME = 1,
|
|
|
|
GOLDEN_FRAME = 2,
|
|
|
|
ALTREF_FRAME = 3,
|
|
|
|
MAX_REF_FRAMES = 4
|
2010-05-18 17:58:33 +02:00
|
|
|
} MV_REFERENCE_FRAME;
|
|
|
|
|
2013-04-29 21:43:38 +02:00
|
|
|
static INLINE int b_width_log2(BLOCK_SIZE_TYPE sb_type) {
|
2013-07-10 16:19:09 +02:00
|
|
|
return b_width_log2_lookup[sb_type];
|
2013-04-10 06:28:27 +02:00
|
|
|
}
|
2013-04-29 21:43:38 +02:00
|
|
|
static INLINE int b_height_log2(BLOCK_SIZE_TYPE sb_type) {
|
2013-07-10 16:19:09 +02:00
|
|
|
return b_height_log2_lookup[sb_type];
|
2013-04-10 06:28:27 +02:00
|
|
|
}
|
2013-01-06 03:20:25 +01:00
|
|
|
|
2013-04-29 21:43:38 +02:00
|
|
|
static INLINE int mi_width_log2(BLOCK_SIZE_TYPE sb_type) {
|
2013-07-10 16:26:08 +02:00
|
|
|
return mi_width_log2_lookup[sb_type];
|
2013-04-11 21:12:11 +02:00
|
|
|
}
|
|
|
|
|
2013-04-29 21:43:38 +02:00
|
|
|
static INLINE int mi_height_log2(BLOCK_SIZE_TYPE sb_type) {
|
2013-07-10 16:26:08 +02:00
|
|
|
return mi_height_log2_lookup[sb_type];
|
2013-04-11 21:12:11 +02:00
|
|
|
}
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
typedef struct {
|
|
|
|
MB_PREDICTION_MODE mode, uv_mode;
|
2013-06-06 22:44:34 +02:00
|
|
|
MV_REFERENCE_FRAME ref_frame[2];
|
2012-07-14 00:21:29 +02:00
|
|
|
TX_SIZE txfm_size;
|
2012-08-10 01:07:41 +02:00
|
|
|
int_mv mv[2]; // for each reference frame used
|
2012-12-04 18:21:05 +01:00
|
|
|
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
|
2012-11-09 19:52:08 +01:00
|
|
|
int_mv best_mv, best_second_mv;
|
2012-08-24 16:44:01 +02:00
|
|
|
|
2013-06-24 23:11:16 +02:00
|
|
|
uint8_t mb_mode_context[MAX_REF_FRAMES];
|
2012-11-12 16:09:25 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
2013-04-23 15:01:55 +02:00
|
|
|
unsigned char segment_id; // Segment id for current frame
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// Flags used for prediction status of various bistream signals
|
|
|
|
unsigned char seg_id_predicted;
|
|
|
|
|
|
|
|
// Indicates if the mb is part of the image (1) vs border (0)
|
|
|
|
// This can be useful in determining whether the MB provides
|
|
|
|
// a valid predictor
|
|
|
|
unsigned char mb_in_image;
|
2012-01-28 11:07:08 +01:00
|
|
|
|
2013-01-08 19:29:22 +01:00
|
|
|
INTERPOLATIONFILTERTYPE interp_filter;
|
2012-06-26 01:23:58 +02:00
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
BLOCK_SIZE_TYPE sb_type;
|
2010-05-18 17:58:33 +02:00
|
|
|
} MB_MODE_INFO;
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
typedef struct {
|
|
|
|
MB_MODE_INFO mbmi;
|
2013-05-27 16:55:25 +02:00
|
|
|
union b_mode_info bmi[4];
|
2010-05-18 17:58:33 +02:00
|
|
|
} MODE_INFO;
|
|
|
|
|
2013-06-22 00:34:29 +02:00
|
|
|
enum mv_precision {
|
|
|
|
MV_PRECISION_Q3,
|
|
|
|
MV_PRECISION_Q4
|
|
|
|
};
|
|
|
|
|
2013-06-10 09:23:04 +02:00
|
|
|
#define VP9_REF_SCALE_SHIFT 14
|
2013-07-19 00:12:46 +02:00
|
|
|
#define VP9_REF_NO_SCALE (1 << VP9_REF_SCALE_SHIFT)
|
2013-07-15 23:59:59 +02:00
|
|
|
|
2013-02-09 02:49:44 +01:00
|
|
|
struct scale_factors {
|
2013-06-10 09:23:04 +02:00
|
|
|
int x_scale_fp; // horizontal fixed point scale factor
|
|
|
|
int y_scale_fp; // vertical fixed point scale factor
|
2013-02-09 02:49:44 +01:00
|
|
|
int x_offset_q4;
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
int x_step_q4;
|
2013-02-09 02:49:44 +01:00
|
|
|
int y_offset_q4;
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
int y_step_q4;
|
2013-04-04 18:56:02 +02:00
|
|
|
|
|
|
|
int (*scale_value_x)(int val, const struct scale_factors *scale);
|
|
|
|
int (*scale_value_y)(int val, const struct scale_factors *scale);
|
|
|
|
void (*set_scaled_offsets)(struct scale_factors *scale, int row, int col);
|
2013-06-22 00:34:29 +02:00
|
|
|
MV32 (*scale_mv_q3_to_q4)(const MV *mv, const struct scale_factors *scale);
|
|
|
|
MV32 (*scale_mv_q4)(const MV *mv, const struct scale_factors *scale);
|
2013-04-04 18:56:02 +02:00
|
|
|
|
Spatial resamping of ZEROMV predictors
This patch allows coding frames using references of different
resolution, in ZEROMV mode. For compound prediction, either
reference may be scaled.
To test, I use the resize_test and enable WRITE_RECON_BUFFER
in vp9_onyxd_if.c. It's also useful to apply this patch to
test/i420_video_source.h:
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource {
virtual void FillFrame() {
// Read a frame from input_file.
+ if (frame_ != 3)
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
limit_ = frame_;
}
This forces the frame that the resolution changes on to be coded
with no motion, only scaling, and improves the quality of the
result.
Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496
2013-02-25 05:55:14 +01:00
|
|
|
convolve_fn_t predict[2][2][2]; // horiz, vert, avg
|
2013-02-09 02:49:44 +01:00
|
|
|
};
|
|
|
|
|
2013-05-16 02:55:08 +02:00
|
|
|
#if CONFIG_ALPHA
|
|
|
|
enum { MAX_MB_PLANE = 4 };
|
|
|
|
#else
|
2013-04-02 23:50:40 +02:00
|
|
|
enum { MAX_MB_PLANE = 3 };
|
2013-05-16 02:55:08 +02:00
|
|
|
#endif
|
2013-04-02 23:50:40 +02:00
|
|
|
|
2013-04-20 00:52:17 +02:00
|
|
|
struct buf_2d {
|
|
|
|
uint8_t *buf;
|
|
|
|
int stride;
|
|
|
|
};
|
|
|
|
|
2013-04-23 17:26:10 +02:00
|
|
|
struct macroblockd_plane {
|
2013-04-02 23:50:40 +02:00
|
|
|
DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]);
|
|
|
|
DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]);
|
2013-04-04 21:03:27 +02:00
|
|
|
DECLARE_ALIGNED(16, uint16_t, eobs[256]);
|
2013-04-06 00:54:59 +02:00
|
|
|
PLANE_TYPE plane_type;
|
|
|
|
int subsampling_x;
|
|
|
|
int subsampling_y;
|
2013-04-20 00:52:17 +02:00
|
|
|
struct buf_2d dst;
|
|
|
|
struct buf_2d pre[2];
|
2013-04-24 23:48:17 +02:00
|
|
|
int16_t *dequant;
|
2013-04-29 19:37:25 +02:00
|
|
|
ENTROPY_CONTEXT *above_context;
|
|
|
|
ENTROPY_CONTEXT *left_context;
|
2013-04-02 23:50:40 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
|
|
|
|
|
2013-07-18 03:37:45 +02:00
|
|
|
#define MAX_REF_LF_DELTAS 4
|
|
|
|
#define MAX_MODE_LF_DELTAS 2
|
|
|
|
|
|
|
|
struct loopfilter {
|
|
|
|
int filter_level;
|
|
|
|
|
|
|
|
int sharpness_level;
|
|
|
|
int last_sharpness_level;
|
|
|
|
|
|
|
|
uint8_t mode_ref_delta_enabled;
|
|
|
|
uint8_t mode_ref_delta_update;
|
|
|
|
|
|
|
|
// 0 = Intra, Last, GF, ARF
|
|
|
|
signed char ref_deltas[MAX_REF_LF_DELTAS];
|
|
|
|
signed char last_ref_deltas[MAX_REF_LF_DELTAS];
|
|
|
|
|
|
|
|
// 0 = ZERO_MV, MV
|
|
|
|
signed char mode_deltas[MAX_MODE_LF_DELTAS];
|
|
|
|
signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
|
|
|
|
};
|
|
|
|
|
2012-10-28 18:38:23 +01:00
|
|
|
typedef struct macroblockd {
|
2013-04-23 17:26:10 +02:00
|
|
|
struct macroblockd_plane plane[MAX_MB_PLANE];
|
32x32 transform for superblocks.
This adds Debargha's DCT/DWT hybrid and a regular 32x32 DCT, and adds
code all over the place to wrap that in the bitstream/encoder/decoder/RD.
Some implementation notes (these probably need careful review):
- token range is extended by 1 bit, since the value range out of this
transform is [-16384,16383].
- the coefficients coming out of the FDCT are manually scaled back by
1 bit, or else they won't fit in int16_t (they are 17 bits). Because
of this, the RD error scoring does not right-shift the MSE score by
two (unlike for 4x4/8x8/16x16).
- to compensate for this loss in precision, the quantizer is halved
also. This is currently a little hacky.
- FDCT and IDCT is double-only right now. Needs a fixed-point impl.
- There are no default probabilities for the 32x32 transform yet; I'm
simply using the 16x16 luma ones. A future commit will add newly
generated probabilities for all transforms.
- No ADST version. I don't think we'll add one for this level; if an
ADST is desired, transform-size selection can scale back to 16x16
or lower, and use an ADST at that level.
Additional notes specific to Debargha's DWT/DCT hybrid:
- coefficient scale is different for the top/left 16x16 (DCT-over-DWT)
block than for the rest (DWT pixel differences) of the block. Therefore,
RD error scoring isn't easily scalable between coefficient and pixel
domain. Thus, unfortunately, we need to compute the RD distortion in
the pixel domain until we figure out how to scale these appropriately.
Change-Id: I00386f20f35d7fabb19aba94c8162f8aee64ef2b
2012-12-07 23:45:05 +01:00
|
|
|
|
2013-02-09 02:49:44 +01:00
|
|
|
struct scale_factors scale_factor[2];
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
MODE_INFO *prev_mode_info_context;
|
|
|
|
MODE_INFO *mode_info_context;
|
|
|
|
int mode_info_stride;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
int up_available;
|
|
|
|
int left_available;
|
[WIP] Add column-based tiling.
This patch adds column-based tiling. The idea is to make each tile
independently decodable (after reading the common frame header) and
also independendly encodable (minus within-frame cost adjustments in
the RD loop) to speed-up hardware & software en/decoders if they used
multi-threading. Column-based tiling has the added advantage (over
other tiling methods) that it minimizes realtime use-case latency,
since all threads can start encoding data as soon as the first SB-row
worth of data is available to the encoder.
There is some test code that does random tile ordering in the decoder,
to confirm that each tile is indeed independently decodable from other
tiles in the same frame. At tile edges, all contexts assume default
values (i.e. 0, 0 motion vector, no coefficients, DC intra4x4 mode),
and motion vector search and ordering do not cross tiles in the same
frame.
t log
Tile independence is not maintained between frames ATM, i.e. tile 0 of
frame 1 is free to use motion vectors that point into any tile of frame
0. We support 1 (i.e. no tiling), 2 or 4 column-tiles.
The loopfilter crosses tile boundaries. I discussed this briefly with Aki
and he says that's OK. An in-loop loopfilter would need to do some sync
between tile threads, but that shouldn't be a big issue.
Resuls: with tiling disabled, we go up slightly because of improved edge
use in the intra4x4 prediction. With 2 tiles, we lose about ~1% on derf,
~0.35% on HD and ~0.55% on STD/HD. With 4 tiles, we lose another ~1.5%
on derf ~0.77% on HD and ~0.85% on STD/HD. Most of this loss is
concentrated in the low-bitrate end of clips, and most of it is because
of the loss of edges at tile boundaries and the resulting loss of intra
predictors.
TODO:
- more tiles (perhaps allow row-based tiling also, and max. 8 tiles)?
- maybe optionally (for EC purposes), motion vectors themselves
should not cross tile edges, or we should emulate such borders as
if they were off-frame, to limit error propagation to within one
tile only. This doesn't have to be the default behaviour but could
be an optional bitstream flag.
Change-Id: I5951c3a0742a767b20bc9fb5af685d9892c2c96f
2013-02-01 18:35:28 +01:00
|
|
|
int right_available;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-07-10 21:29:43 +02:00
|
|
|
struct segmentation seg;
|
2013-07-18 03:37:45 +02:00
|
|
|
struct loopfilter lf;
|
2013-07-10 21:29:43 +02:00
|
|
|
|
2013-04-23 19:12:18 +02:00
|
|
|
// partition contexts
|
|
|
|
PARTITION_CONTEXT *above_seg_context;
|
|
|
|
PARTITION_CONTEXT *left_seg_context;
|
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
/* Distance of MB away from frame edges */
|
|
|
|
int mb_to_left_edge;
|
|
|
|
int mb_to_right_edge;
|
|
|
|
int mb_to_top_edge;
|
|
|
|
int mb_to_bottom_edge;
|
|
|
|
|
2013-02-12 00:58:22 +01:00
|
|
|
int lossless;
|
2012-11-25 04:33:58 +01:00
|
|
|
/* Inverse transform function pointers. */
|
2013-05-20 19:03:17 +02:00
|
|
|
void (*inv_txm4x4_1_add)(int16_t *input, uint8_t *dest, int stride);
|
|
|
|
void (*inv_txm4x4_add)(int16_t *input, uint8_t *dest, int stride);
|
2013-04-22 23:53:07 +02:00
|
|
|
void (*itxm_add)(int16_t *input, uint8_t *dest, int stride, int eob);
|
2012-11-25 04:33:58 +01:00
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
struct subpix_fn_table subpix;
|
2012-11-25 04:33:58 +01:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
int allow_high_precision_mv;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
int corrupted;
|
2010-12-16 16:46:31 +01:00
|
|
|
|
2013-07-11 17:18:34 +02:00
|
|
|
unsigned char sb_index; // index of 32x32 block inside the 64x64 block
|
|
|
|
unsigned char mb_index; // index of 16x16 block inside the 32x32 block
|
|
|
|
unsigned char b_index; // index of 8x8 block inside the 16x16 block
|
|
|
|
unsigned char ab_index; // index of 4x4 block inside the 8x8 block
|
|
|
|
|
2012-06-25 21:26:09 +02:00
|
|
|
int q_index;
|
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
} MACROBLOCKD;
|
|
|
|
|
2013-07-11 17:18:34 +02:00
|
|
|
static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
|
2013-05-11 22:24:03 +02:00
|
|
|
switch (subsize) {
|
2013-05-16 07:28:36 +02:00
|
|
|
case BLOCK_SIZE_SB64X64:
|
2013-05-11 22:24:03 +02:00
|
|
|
case BLOCK_SIZE_SB64X32:
|
|
|
|
case BLOCK_SIZE_SB32X64:
|
|
|
|
case BLOCK_SIZE_SB32X32:
|
|
|
|
return &xd->sb_index;
|
|
|
|
case BLOCK_SIZE_SB32X16:
|
|
|
|
case BLOCK_SIZE_SB16X32:
|
|
|
|
case BLOCK_SIZE_MB16X16:
|
|
|
|
return &xd->mb_index;
|
|
|
|
case BLOCK_SIZE_SB16X8:
|
|
|
|
case BLOCK_SIZE_SB8X16:
|
|
|
|
case BLOCK_SIZE_SB8X8:
|
|
|
|
return &xd->b_index;
|
|
|
|
case BLOCK_SIZE_SB8X4:
|
|
|
|
case BLOCK_SIZE_SB4X8:
|
|
|
|
case BLOCK_SIZE_AB4X4:
|
|
|
|
return &xd->ab_index;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-23 19:12:18 +02:00
|
|
|
static INLINE void update_partition_context(MACROBLOCKD *xd,
|
|
|
|
BLOCK_SIZE_TYPE sb_type,
|
|
|
|
BLOCK_SIZE_TYPE sb_size) {
|
2013-06-29 03:07:37 +02:00
|
|
|
const int bsl = b_width_log2(sb_size), bs = (1 << bsl) / 2;
|
|
|
|
const int bwl = b_width_log2(sb_type);
|
|
|
|
const int bhl = b_height_log2(sb_type);
|
|
|
|
const int boffset = b_width_log2(BLOCK_SIZE_SB64X64) - bsl;
|
|
|
|
const char pcval0 = ~(0xe << boffset);
|
|
|
|
const char pcval1 = ~(0xf << boffset);
|
|
|
|
const char pcvalue[2] = {pcval0, pcval1};
|
2013-06-26 20:50:14 +02:00
|
|
|
|
|
|
|
assert(MAX(bwl, bhl) <= bsl);
|
2013-05-11 02:06:37 +02:00
|
|
|
|
2013-04-23 19:12:18 +02:00
|
|
|
// update the partition context at the end notes. set partition bits
|
|
|
|
// of block sizes larger than the current one to be one, and partition
|
|
|
|
// bits of smaller block sizes to be zero.
|
2013-06-26 20:50:14 +02:00
|
|
|
vpx_memset(xd->above_seg_context, pcvalue[bwl == bsl], bs);
|
|
|
|
vpx_memset(xd->left_seg_context, pcvalue[bhl == bsl], bs);
|
2013-04-23 19:12:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static INLINE int partition_plane_context(MACROBLOCKD *xd,
|
|
|
|
BLOCK_SIZE_TYPE sb_type) {
|
2013-05-08 00:36:30 +02:00
|
|
|
int bsl = mi_width_log2(sb_type), bs = 1 << bsl;
|
2013-04-23 19:12:18 +02:00
|
|
|
int above = 0, left = 0, i;
|
2013-05-01 01:13:20 +02:00
|
|
|
int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl;
|
2013-04-23 19:12:18 +02:00
|
|
|
|
2013-04-26 20:57:17 +02:00
|
|
|
assert(mi_width_log2(sb_type) == mi_height_log2(sb_type));
|
2013-04-23 19:12:18 +02:00
|
|
|
assert(bsl >= 0);
|
|
|
|
assert(boffset >= 0);
|
|
|
|
|
|
|
|
for (i = 0; i < bs; i++)
|
|
|
|
above |= (xd->above_seg_context[i] & (1 << boffset));
|
|
|
|
for (i = 0; i < bs; i++)
|
|
|
|
left |= (xd->left_seg_context[i] & (1 << boffset));
|
|
|
|
|
|
|
|
above = (above > 0);
|
|
|
|
left = (left > 0);
|
|
|
|
|
2013-05-11 02:06:37 +02:00
|
|
|
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
|
2013-04-23 19:12:18 +02:00
|
|
|
}
|
|
|
|
|
2013-05-01 18:43:59 +02:00
|
|
|
static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
|
|
|
|
PARTITION_TYPE partition) {
|
2013-07-11 01:51:07 +02:00
|
|
|
BLOCK_SIZE_TYPE subsize = subsize_lookup[partition][bsize];
|
|
|
|
assert(subsize != BLOCK_SIZE_TYPES);
|
2013-05-01 18:43:59 +02:00
|
|
|
return subsize;
|
|
|
|
}
|
|
|
|
|
2013-06-26 03:15:42 +02:00
|
|
|
extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT];
|
2012-08-02 18:07:33 +02:00
|
|
|
|
2013-07-24 21:55:45 +02:00
|
|
|
static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
|
|
|
|
const MACROBLOCKD *xd, int ib) {
|
|
|
|
const MODE_INFO *const mi = xd->mode_info_context;
|
|
|
|
const MB_MODE_INFO *const mbmi = &mi->mbmi;
|
|
|
|
|
|
|
|
if (plane_type != PLANE_TYPE_Y_WITH_DC ||
|
|
|
|
xd->lossless ||
|
|
|
|
mbmi->ref_frame[0] != INTRA_FRAME)
|
2013-02-12 06:14:46 +01:00
|
|
|
return DCT_DCT;
|
2013-06-25 02:56:06 +02:00
|
|
|
|
2013-06-26 03:15:42 +02:00
|
|
|
return mode2txfm_map[mbmi->sb_type < BLOCK_SIZE_SB8X8 ?
|
2013-07-03 01:51:57 +02:00
|
|
|
mi->bmi[ib].as_mode : mbmi->mode];
|
2012-10-16 01:41:41 +02:00
|
|
|
}
|
|
|
|
|
2013-07-24 21:55:45 +02:00
|
|
|
static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type,
|
|
|
|
const MACROBLOCKD *xd) {
|
|
|
|
return plane_type == PLANE_TYPE_Y_WITH_DC ?
|
|
|
|
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
|
2012-08-02 18:07:33 +02:00
|
|
|
}
|
2012-09-21 23:20:15 +02:00
|
|
|
|
2013-07-24 21:55:45 +02:00
|
|
|
static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type,
|
|
|
|
const MACROBLOCKD *xd) {
|
|
|
|
return plane_type == PLANE_TYPE_Y_WITH_DC ?
|
|
|
|
mode2txfm_map[xd->mode_info_context->mbmi.mode] : DCT_DCT;
|
2012-10-16 01:41:41 +02:00
|
|
|
}
|
|
|
|
|
2013-07-02 02:28:08 +02:00
|
|
|
static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_MB_PLANE; i++) {
|
|
|
|
xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC;
|
|
|
|
xd->plane[i].subsampling_x = i ? ss_x : 0;
|
|
|
|
xd->plane[i].subsampling_y = i ? ss_y : 0;
|
|
|
|
}
|
|
|
|
#if CONFIG_ALPHA
|
|
|
|
// TODO(jkoleszar): Using the Y w/h for now
|
2013-07-09 01:01:01 +02:00
|
|
|
xd->plane[3].subsampling_x = 0;
|
|
|
|
xd->plane[3].subsampling_y = 0;
|
2013-07-02 02:28:08 +02:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2013-07-11 01:51:07 +02:00
|
|
|
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
|
2013-07-16 17:58:37 +02:00
|
|
|
return MIN(mbmi->txfm_size, max_uv_txsize_lookup[mbmi->sb_type]);
|
2013-02-20 19:16:24 +01:00
|
|
|
}
|
2013-03-26 23:23:30 +01:00
|
|
|
|
2013-04-02 23:50:40 +02:00
|
|
|
struct plane_block_idx {
|
|
|
|
int plane;
|
|
|
|
int block;
|
|
|
|
};
|
|
|
|
|
|
|
|
// TODO(jkoleszar): returning a struct so it can be used in a const context,
|
|
|
|
// expect to refactor this further later.
|
2013-04-04 21:03:27 +02:00
|
|
|
static INLINE struct plane_block_idx plane_block_idx(int y_blocks,
|
2013-03-28 18:42:23 +01:00
|
|
|
int b_idx) {
|
2013-04-04 21:03:27 +02:00
|
|
|
const int v_offset = y_blocks * 5 / 4;
|
2013-04-02 23:50:40 +02:00
|
|
|
struct plane_block_idx res;
|
|
|
|
|
2013-04-04 21:03:27 +02:00
|
|
|
if (b_idx < y_blocks) {
|
2013-04-02 23:50:40 +02:00
|
|
|
res.plane = 0;
|
|
|
|
res.block = b_idx;
|
|
|
|
} else if (b_idx < v_offset) {
|
|
|
|
res.plane = 1;
|
2013-04-04 21:03:27 +02:00
|
|
|
res.block = b_idx - y_blocks;
|
2013-04-02 23:50:40 +02:00
|
|
|
} else {
|
2013-04-04 21:03:27 +02:00
|
|
|
assert(b_idx < y_blocks * 3 / 2);
|
2013-04-02 23:50:40 +02:00
|
|
|
res.plane = 2;
|
|
|
|
res.block = b_idx - v_offset;
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2013-05-31 21:30:32 +02:00
|
|
|
static INLINE int plane_block_width(BLOCK_SIZE_TYPE bsize,
|
|
|
|
const struct macroblockd_plane* plane) {
|
|
|
|
return 4 << (b_width_log2(bsize) - plane->subsampling_x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static INLINE int plane_block_height(BLOCK_SIZE_TYPE bsize,
|
|
|
|
const struct macroblockd_plane* plane) {
|
|
|
|
return 4 << (b_height_log2(bsize) - plane->subsampling_y);
|
|
|
|
}
|
|
|
|
|
2013-07-11 01:51:07 +02:00
|
|
|
static INLINE int plane_block_width_log2by4(
|
|
|
|
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
|
|
|
|
return (b_width_log2(bsize) - plane->subsampling_x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static INLINE int plane_block_height_log2by4(
|
|
|
|
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
|
|
|
|
return (b_height_log2(bsize) - plane->subsampling_y);
|
|
|
|
}
|
|
|
|
|
2013-04-09 19:15:10 +02:00
|
|
|
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
|
2013-04-12 23:12:05 +02:00
|
|
|
BLOCK_SIZE_TYPE bsize,
|
2013-04-09 19:15:10 +02:00
|
|
|
int ss_txfrm_size,
|
|
|
|
void *arg);
|
2013-06-06 06:14:14 +02:00
|
|
|
|
2013-04-09 19:15:10 +02:00
|
|
|
static INLINE void foreach_transformed_block_in_plane(
|
2013-04-12 23:12:05 +02:00
|
|
|
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
|
2013-05-01 01:13:20 +02:00
|
|
|
foreach_transformed_block_visitor visit, void *arg) {
|
2013-04-12 23:12:05 +02:00
|
|
|
const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
|
|
|
|
|
2013-04-09 19:15:10 +02:00
|
|
|
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
|
|
|
|
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
2013-05-07 23:44:12 +02:00
|
|
|
// transform size varies per plane, look it up in a common way.
|
2013-06-10 15:48:58 +02:00
|
|
|
const MB_MODE_INFO* mbmi = &xd->mode_info_context->mbmi;
|
|
|
|
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi)
|
|
|
|
: mbmi->txfm_size;
|
2013-04-12 23:12:05 +02:00
|
|
|
const int block_size_b = bw + bh;
|
2013-04-09 19:15:10 +02:00
|
|
|
const int txfrm_size_b = tx_size * 2;
|
|
|
|
|
|
|
|
// subsampled size of the block
|
2013-06-06 15:07:09 +02:00
|
|
|
const int ss_sum = xd->plane[plane].subsampling_x
|
|
|
|
+ xd->plane[plane].subsampling_y;
|
2013-04-09 19:15:10 +02:00
|
|
|
const int ss_block_size = block_size_b - ss_sum;
|
|
|
|
|
2013-05-07 23:44:12 +02:00
|
|
|
const int step = 1 << txfrm_size_b;
|
2013-04-09 19:15:10 +02:00
|
|
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
assert(txfrm_size_b <= block_size_b);
|
2013-05-07 23:44:12 +02:00
|
|
|
assert(txfrm_size_b <= ss_block_size);
|
2013-06-06 15:07:09 +02:00
|
|
|
|
|
|
|
// If mb_to_right_edge is < 0 we are in a situation in which
|
|
|
|
// the current block size extends into the UMV and we won't
|
|
|
|
// visit the sub blocks that are wholly within the UMV.
|
|
|
|
if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) {
|
|
|
|
int r, c;
|
|
|
|
const int sw = bw - xd->plane[plane].subsampling_x;
|
|
|
|
const int sh = bh - xd->plane[plane].subsampling_y;
|
|
|
|
int max_blocks_wide = 1 << sw;
|
|
|
|
int max_blocks_high = 1 << sh;
|
|
|
|
|
|
|
|
// xd->mb_to_right_edge is in units of pixels * 8. This converts
|
|
|
|
// it to 4x4 block sizes.
|
|
|
|
if (xd->mb_to_right_edge < 0)
|
|
|
|
max_blocks_wide +=
|
2013-07-30 19:16:03 +02:00
|
|
|
(xd->mb_to_right_edge >> (5 + xd->plane[plane].subsampling_x));
|
2013-06-06 15:07:09 +02:00
|
|
|
|
|
|
|
if (xd->mb_to_bottom_edge < 0)
|
|
|
|
max_blocks_high +=
|
2013-07-30 19:16:03 +02:00
|
|
|
(xd->mb_to_bottom_edge >> (5 + xd->plane[plane].subsampling_y));
|
2013-06-06 15:07:09 +02:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
// Unlike the normal case - in here we have to keep track of the
|
|
|
|
// row and column of the blocks we use so that we know if we are in
|
2013-07-30 19:16:03 +02:00
|
|
|
// the unrestricted motion border.
|
2013-06-06 15:07:09 +02:00
|
|
|
for (r = 0; r < (1 << sh); r += (1 << tx_size)) {
|
|
|
|
for (c = 0; c < (1 << sw); c += (1 << tx_size)) {
|
|
|
|
if (r < max_blocks_high && c < max_blocks_wide)
|
|
|
|
visit(plane, i, bsize, txfrm_size_b, arg);
|
|
|
|
i += step;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < (1 << ss_block_size); i += step) {
|
|
|
|
visit(plane, i, bsize, txfrm_size_b, arg);
|
|
|
|
}
|
2013-04-09 19:15:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static INLINE void foreach_transformed_block(
|
2013-04-12 23:12:05 +02:00
|
|
|
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
2013-04-09 19:15:10 +02:00
|
|
|
foreach_transformed_block_visitor visit, void *arg) {
|
|
|
|
int plane;
|
|
|
|
|
|
|
|
for (plane = 0; plane < MAX_MB_PLANE; plane++) {
|
2013-05-01 01:13:20 +02:00
|
|
|
foreach_transformed_block_in_plane(xd, bsize, plane,
|
2013-04-09 19:15:10 +02:00
|
|
|
visit, arg);
|
|
|
|
}
|
|
|
|
}
|
2013-04-02 23:50:40 +02:00
|
|
|
|
2013-04-11 20:14:31 +02:00
|
|
|
static INLINE void foreach_transformed_block_uv(
|
2013-04-12 23:12:05 +02:00
|
|
|
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
2013-04-11 20:14:31 +02:00
|
|
|
foreach_transformed_block_visitor visit, void *arg) {
|
|
|
|
int plane;
|
|
|
|
|
|
|
|
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
2013-05-01 01:13:20 +02:00
|
|
|
foreach_transformed_block_in_plane(xd, bsize, plane,
|
2013-04-11 20:14:31 +02:00
|
|
|
visit, arg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
make buid_inter_predictors block size agnostic (luma)
This commit converts the luma versions of vp9_build_inter_predictors_sb
to use a common function. Update the convolution functions to support
block sizes larger than 16x16, and add a foreach_predicted_block walker.
Next step will be to calculate the UV motion vector and implement SBUV,
then fold in vp9_build_inter16x16_predictors_mb and SPLITMV.
At the 16x16, 32x32, and 64x64 levels implemented in this commit, each
plane is predicted with only a single call to vp9_build_inter_predictor.
This is not yet called for SPLITMV. If the notion of SPLITMV/I8X8/I4X4
goes away, then the prediction block walker can go away, since we'll
always predict the whole bsize in a single step. Implemented using a
block walker at this stage for SPLITMV, as a 4x4 "prediction block size"
within the BLOCK_SIZE_MB16X16 macroblock. It would also support other
rectangular sizes too, if the blocks smaller than 16x16 remain
implemented as a SPLITMV-like thing. Just using 4x4 for now.
There's also a potential to combine with the foreach_transformed_block
walker if the logic for calculating the size of the subsampled
transform is made more straightforward, perhaps as a consequence of
supporing smaller macroblocks than 16x16. Will watch what happens there.
Change-Id: Iddd9973398542216601b630c628b9b7fdee33fe2
2013-04-13 02:19:57 +02:00
|
|
|
// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could
|
|
|
|
// calculate the subsampled BLOCK_SIZE_TYPE, but that type isn't defined for
|
|
|
|
// sizes smaller than 16x16 yet.
|
|
|
|
typedef void (*foreach_predicted_block_visitor)(int plane, int block,
|
|
|
|
BLOCK_SIZE_TYPE bsize,
|
|
|
|
int pred_w, int pred_h,
|
|
|
|
void *arg);
|
|
|
|
static INLINE void foreach_predicted_block_in_plane(
|
|
|
|
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane,
|
|
|
|
foreach_predicted_block_visitor visit, void *arg) {
|
2013-04-17 22:41:18 +02:00
|
|
|
int i, x, y;
|
make buid_inter_predictors block size agnostic (luma)
This commit converts the luma versions of vp9_build_inter_predictors_sb
to use a common function. Update the convolution functions to support
block sizes larger than 16x16, and add a foreach_predicted_block walker.
Next step will be to calculate the UV motion vector and implement SBUV,
then fold in vp9_build_inter16x16_predictors_mb and SPLITMV.
At the 16x16, 32x32, and 64x64 levels implemented in this commit, each
plane is predicted with only a single call to vp9_build_inter_predictor.
This is not yet called for SPLITMV. If the notion of SPLITMV/I8X8/I4X4
goes away, then the prediction block walker can go away, since we'll
always predict the whole bsize in a single step. Implemented using a
block walker at this stage for SPLITMV, as a 4x4 "prediction block size"
within the BLOCK_SIZE_MB16X16 macroblock. It would also support other
rectangular sizes too, if the blocks smaller than 16x16 remain
implemented as a SPLITMV-like thing. Just using 4x4 for now.
There's also a potential to combine with the foreach_transformed_block
walker if the logic for calculating the size of the subsampled
transform is made more straightforward, perhaps as a consequence of
supporing smaller macroblocks than 16x16. Will watch what happens there.
Change-Id: Iddd9973398542216601b630c628b9b7fdee33fe2
2013-04-13 02:19:57 +02:00
|
|
|
|
|
|
|
// block sizes in number of 4x4 blocks log 2 ("*_b")
|
|
|
|
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
|
|
|
// subsampled size of the block
|
2013-05-31 21:30:32 +02:00
|
|
|
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
|
|
|
const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
|
make buid_inter_predictors block size agnostic (luma)
This commit converts the luma versions of vp9_build_inter_predictors_sb
to use a common function. Update the convolution functions to support
block sizes larger than 16x16, and add a foreach_predicted_block walker.
Next step will be to calculate the UV motion vector and implement SBUV,
then fold in vp9_build_inter16x16_predictors_mb and SPLITMV.
At the 16x16, 32x32, and 64x64 levels implemented in this commit, each
plane is predicted with only a single call to vp9_build_inter_predictor.
This is not yet called for SPLITMV. If the notion of SPLITMV/I8X8/I4X4
goes away, then the prediction block walker can go away, since we'll
always predict the whole bsize in a single step. Implemented using a
block walker at this stage for SPLITMV, as a 4x4 "prediction block size"
within the BLOCK_SIZE_MB16X16 macroblock. It would also support other
rectangular sizes too, if the blocks smaller than 16x16 remain
implemented as a SPLITMV-like thing. Just using 4x4 for now.
There's also a potential to combine with the foreach_transformed_block
walker if the logic for calculating the size of the subsampled
transform is made more straightforward, perhaps as a consequence of
supporing smaller macroblocks than 16x16. Will watch what happens there.
Change-Id: Iddd9973398542216601b630c628b9b7fdee33fe2
2013-04-13 02:19:57 +02:00
|
|
|
|
|
|
|
// size of the predictor to use.
|
2013-04-17 22:41:18 +02:00
|
|
|
int pred_w, pred_h;
|
|
|
|
|
2013-05-30 21:49:38 +02:00
|
|
|
if (xd->mode_info_context->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
|
|
|
|
assert(bsize == BLOCK_SIZE_SB8X8);
|
2013-05-01 01:13:20 +02:00
|
|
|
pred_w = 0;
|
|
|
|
pred_h = 0;
|
2013-04-17 22:41:18 +02:00
|
|
|
} else {
|
2013-05-31 21:30:32 +02:00
|
|
|
pred_w = bwl;
|
|
|
|
pred_h = bhl;
|
2013-04-17 22:41:18 +02:00
|
|
|
}
|
2013-05-31 21:30:32 +02:00
|
|
|
assert(pred_w <= bwl);
|
|
|
|
assert(pred_h <= bhl);
|
2013-04-17 22:41:18 +02:00
|
|
|
|
|
|
|
// visit each subblock in raster order
|
|
|
|
i = 0;
|
2013-05-31 21:30:32 +02:00
|
|
|
for (y = 0; y < 1 << bhl; y += 1 << pred_h) {
|
|
|
|
for (x = 0; x < 1 << bwl; x += 1 << pred_w) {
|
2013-04-17 22:41:18 +02:00
|
|
|
visit(plane, i, bsize, pred_w, pred_h, arg);
|
|
|
|
i += 1 << pred_w;
|
|
|
|
}
|
2013-05-31 21:30:32 +02:00
|
|
|
i += (1 << (bwl + pred_h)) - (1 << bwl);
|
make buid_inter_predictors block size agnostic (luma)
This commit converts the luma versions of vp9_build_inter_predictors_sb
to use a common function. Update the convolution functions to support
block sizes larger than 16x16, and add a foreach_predicted_block walker.
Next step will be to calculate the UV motion vector and implement SBUV,
then fold in vp9_build_inter16x16_predictors_mb and SPLITMV.
At the 16x16, 32x32, and 64x64 levels implemented in this commit, each
plane is predicted with only a single call to vp9_build_inter_predictor.
This is not yet called for SPLITMV. If the notion of SPLITMV/I8X8/I4X4
goes away, then the prediction block walker can go away, since we'll
always predict the whole bsize in a single step. Implemented using a
block walker at this stage for SPLITMV, as a 4x4 "prediction block size"
within the BLOCK_SIZE_MB16X16 macroblock. It would also support other
rectangular sizes too, if the blocks smaller than 16x16 remain
implemented as a SPLITMV-like thing. Just using 4x4 for now.
There's also a potential to combine with the foreach_transformed_block
walker if the logic for calculating the size of the subsampled
transform is made more straightforward, perhaps as a consequence of
supporing smaller macroblocks than 16x16. Will watch what happens there.
Change-Id: Iddd9973398542216601b630c628b9b7fdee33fe2
2013-04-13 02:19:57 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
static INLINE void foreach_predicted_block(
|
|
|
|
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
|
|
|
foreach_predicted_block_visitor visit, void *arg) {
|
|
|
|
int plane;
|
|
|
|
|
|
|
|
for (plane = 0; plane < MAX_MB_PLANE; plane++) {
|
|
|
|
foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static INLINE void foreach_predicted_block_uv(
|
|
|
|
const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize,
|
|
|
|
foreach_predicted_block_visitor visit, void *arg) {
|
|
|
|
int plane;
|
|
|
|
|
|
|
|
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
|
|
|
|
foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg);
|
|
|
|
}
|
|
|
|
}
|
2013-04-23 17:26:10 +02:00
|
|
|
static int raster_block_offset(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
2013-04-24 01:22:47 +02:00
|
|
|
int plane, int block, int stride) {
|
2013-04-23 17:26:10 +02:00
|
|
|
const int bw = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
|
|
|
const int y = 4 * (block >> bw), x = 4 * (block & ((1 << bw) - 1));
|
|
|
|
return y * stride + x;
|
|
|
|
}
|
|
|
|
static int16_t* raster_block_offset_int16(MACROBLOCKD *xd,
|
|
|
|
BLOCK_SIZE_TYPE bsize,
|
|
|
|
int plane, int block, int16_t *base) {
|
2013-05-31 21:30:32 +02:00
|
|
|
const int stride = plane_block_width(bsize, &xd->plane[plane]);
|
2013-04-24 01:22:47 +02:00
|
|
|
return base + raster_block_offset(xd, bsize, plane, block, stride);
|
|
|
|
}
|
|
|
|
static uint8_t* raster_block_offset_uint8(MACROBLOCKD *xd,
|
|
|
|
BLOCK_SIZE_TYPE bsize,
|
|
|
|
int plane, int block,
|
|
|
|
uint8_t *base, int stride) {
|
|
|
|
return base + raster_block_offset(xd, bsize, plane, block, stride);
|
2013-04-23 17:26:10 +02:00
|
|
|
}
|
make buid_inter_predictors block size agnostic (luma)
This commit converts the luma versions of vp9_build_inter_predictors_sb
to use a common function. Update the convolution functions to support
block sizes larger than 16x16, and add a foreach_predicted_block walker.
Next step will be to calculate the UV motion vector and implement SBUV,
then fold in vp9_build_inter16x16_predictors_mb and SPLITMV.
At the 16x16, 32x32, and 64x64 levels implemented in this commit, each
plane is predicted with only a single call to vp9_build_inter_predictor.
This is not yet called for SPLITMV. If the notion of SPLITMV/I8X8/I4X4
goes away, then the prediction block walker can go away, since we'll
always predict the whole bsize in a single step. Implemented using a
block walker at this stage for SPLITMV, as a 4x4 "prediction block size"
within the BLOCK_SIZE_MB16X16 macroblock. It would also support other
rectangular sizes too, if the blocks smaller than 16x16 remain
implemented as a SPLITMV-like thing. Just using 4x4 for now.
There's also a potential to combine with the foreach_transformed_block
walker if the logic for calculating the size of the subsampled
transform is made more straightforward, perhaps as a consequence of
supporing smaller macroblocks than 16x16. Will watch what happens there.
Change-Id: Iddd9973398542216601b630c628b9b7fdee33fe2
2013-04-13 02:19:57 +02:00
|
|
|
|
2013-04-30 18:54:51 +02:00
|
|
|
static int txfrm_block_to_raster_block(MACROBLOCKD *xd,
|
|
|
|
BLOCK_SIZE_TYPE bsize,
|
|
|
|
int plane, int block,
|
|
|
|
int ss_txfrm_size) {
|
|
|
|
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
|
|
|
const int txwl = ss_txfrm_size / 2;
|
2013-07-12 20:37:43 +02:00
|
|
|
const int tx_cols_log2 = bwl - txwl;
|
|
|
|
const int tx_cols = 1 << tx_cols_log2;
|
2013-04-30 18:54:51 +02:00
|
|
|
const int raster_mb = block >> ss_txfrm_size;
|
|
|
|
const int x = (raster_mb & (tx_cols - 1)) << (txwl);
|
2013-07-12 20:37:43 +02:00
|
|
|
const int y = raster_mb >> tx_cols_log2 << (txwl);
|
2013-04-30 18:54:51 +02:00
|
|
|
return x + (y << bwl);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void txfrm_block_to_raster_xy(MACROBLOCKD *xd,
|
|
|
|
BLOCK_SIZE_TYPE bsize,
|
|
|
|
int plane, int block,
|
|
|
|
int ss_txfrm_size,
|
|
|
|
int *x, int *y) {
|
|
|
|
const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
|
|
|
|
const int txwl = ss_txfrm_size / 2;
|
2013-07-12 20:37:43 +02:00
|
|
|
const int tx_cols_log2 = bwl - txwl;
|
|
|
|
const int tx_cols = 1 << tx_cols_log2;
|
2013-04-30 18:54:51 +02:00
|
|
|
const int raster_mb = block >> ss_txfrm_size;
|
|
|
|
*x = (raster_mb & (tx_cols - 1)) << (txwl);
|
2013-07-12 20:37:43 +02:00
|
|
|
*y = raster_mb >> tx_cols_log2 << (txwl);
|
2013-04-30 18:54:51 +02:00
|
|
|
}
|
2013-06-06 15:07:09 +02:00
|
|
|
|
|
|
|
static void extend_for_intra(MACROBLOCKD* const xd, int plane, int block,
|
|
|
|
BLOCK_SIZE_TYPE bsize, int ss_txfrm_size) {
|
|
|
|
const int bw = plane_block_width(bsize, &xd->plane[plane]);
|
|
|
|
const int bh = plane_block_height(bsize, &xd->plane[plane]);
|
|
|
|
int x, y;
|
|
|
|
txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
|
|
|
|
x = x * 4 - 1;
|
|
|
|
y = y * 4 - 1;
|
|
|
|
// Copy a pixel into the umv if we are in a situation where the block size
|
|
|
|
// extends into the UMV.
|
|
|
|
// TODO(JBB): Should be able to do the full extend in place so we don't have
|
|
|
|
// to do this multiple times.
|
|
|
|
if (xd->mb_to_right_edge < 0) {
|
|
|
|
int umv_border_start = bw
|
|
|
|
+ (xd->mb_to_right_edge >> (3 + xd->plane[plane].subsampling_x));
|
|
|
|
|
|
|
|
if (x + bw > umv_border_start)
|
|
|
|
vpx_memset(
|
|
|
|
xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
|
|
|
|
+ umv_border_start,
|
|
|
|
*(xd->plane[plane].dst.buf + y * xd->plane[plane].dst.stride
|
|
|
|
+ umv_border_start - 1),
|
|
|
|
bw);
|
|
|
|
}
|
|
|
|
if (xd->mb_to_bottom_edge < 0) {
|
|
|
|
int umv_border_start = bh
|
|
|
|
+ (xd->mb_to_bottom_edge >> (3 + xd->plane[plane].subsampling_y));
|
|
|
|
int i;
|
|
|
|
uint8_t c = *(xd->plane[plane].dst.buf
|
|
|
|
+ (umv_border_start - 1) * xd->plane[plane].dst.stride + x);
|
|
|
|
|
|
|
|
uint8_t *d = xd->plane[plane].dst.buf
|
|
|
|
+ umv_border_start * xd->plane[plane].dst.stride + x;
|
|
|
|
|
|
|
|
if (y + bh > umv_border_start)
|
|
|
|
for (i = 0; i < bh; i++, d += xd->plane[plane].dst.stride)
|
|
|
|
*d = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static void set_contexts_on_border(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
|
|
|
|
int plane, int ss_tx_size, int eob, int aoff,
|
|
|
|
int loff, ENTROPY_CONTEXT *A,
|
|
|
|
ENTROPY_CONTEXT *L) {
|
|
|
|
const int bw = b_width_log2(bsize), bh = b_height_log2(bsize);
|
|
|
|
const int sw = bw - xd->plane[plane].subsampling_x;
|
|
|
|
const int sh = bh - xd->plane[plane].subsampling_y;
|
|
|
|
int mi_blocks_wide = 1 << sw;
|
|
|
|
int mi_blocks_high = 1 << sh;
|
|
|
|
int tx_size_in_blocks = (1 << ss_tx_size);
|
|
|
|
int above_contexts = tx_size_in_blocks;
|
|
|
|
int left_contexts = tx_size_in_blocks;
|
|
|
|
int pt;
|
|
|
|
|
|
|
|
// xd->mb_to_right_edge is in units of pixels * 8. This converts
|
|
|
|
// it to 4x4 block sizes.
|
|
|
|
if (xd->mb_to_right_edge < 0) {
|
|
|
|
mi_blocks_wide += (xd->mb_to_right_edge
|
|
|
|
>> (5 + xd->plane[plane].subsampling_x));
|
|
|
|
}
|
|
|
|
|
|
|
|
// this code attempts to avoid copying into contexts that are outside
|
|
|
|
// our border. Any blocks that do are set to 0...
|
|
|
|
if (above_contexts + aoff > mi_blocks_wide)
|
|
|
|
above_contexts = mi_blocks_wide - aoff;
|
|
|
|
|
|
|
|
if (xd->mb_to_bottom_edge < 0) {
|
|
|
|
mi_blocks_high += (xd->mb_to_bottom_edge
|
|
|
|
>> (5 + xd->plane[plane].subsampling_y));
|
|
|
|
}
|
|
|
|
if (left_contexts + loff > mi_blocks_high) {
|
|
|
|
left_contexts = mi_blocks_high - loff;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (pt = 0; pt < above_contexts; pt++)
|
|
|
|
A[pt] = eob > 0;
|
|
|
|
for (pt = above_contexts; pt < (1 << ss_tx_size); pt++)
|
|
|
|
A[pt] = 0;
|
|
|
|
for (pt = 0; pt < left_contexts; pt++)
|
|
|
|
L[pt] = eob > 0;
|
|
|
|
for (pt = left_contexts; pt < (1 << ss_tx_size); pt++)
|
|
|
|
L[pt] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
#endif // VP9_COMMON_VP9_BLOCKD_H_
|